pubchem.protein Schema Extraction¶

This notebook demonstrates RDF schema extraction from the pubchem.protein SPARQL endpoint. It discovers VoID (Vocabulary of Interlinked Datasets) descriptions and generates JSON-LD as the source for all downstream outputs including frequency analysis and LinkML schemas.

Exports¶

  • JSON-LD Schema (primary output)
  • N-Quads RDF
  • VoID Graph for the dataset in its original source
  • Coverage report
  • LinkML Schema
  • Full parquet entity dataframe
In [1]:
# Dataset Configuration
import os

# Dataset parameters
endpoint_url = "https://idsm.elixir-czech.cz/sparql/endpoint/idsm"
dataset_name = "pubchem.protein"
void_iri = "http://rdf.ncbi.nlm.nih.gov/pubchem/protein"
graph_uri = "http://rdf.ncbi.nlm.nih.gov/pubchem/protein"

# Setup paths
working_path = os.path.abspath("")
exports_path = os.path.join(
    working_path, "..", "..", "docs", "data", "schema_extraction", dataset_name
)
os.makedirs(exports_path, exist_ok=True)
In [2]:
import logging
import sys

# Minimal notebook logger using existing dataset_name
logger = logging.getLogger(dataset_name or "notebook")
logger.setLevel(logging.DEBUG)  # Set to DEBUG to see SPARQL queries

# Also configure the rdfsolve.parser logger to see query details
parser_logger = logging.getLogger("rdfsolve.parser")
parser_logger.setLevel(logging.DEBUG)

# Avoid adding duplicate handlers if the cell is re-run
if not logger.handlers:
    fmt = logging.Formatter("%(asctime)s %(levelname)s %(name)s: %(message)s", "%Y-%m-%d %H:%M:%S")

    sh = logging.StreamHandler(sys.stdout)
    sh.setLevel(logging.DEBUG)  # Set to DEBUG to see all logs
    sh.setFormatter(fmt)
    logger.addHandler(sh)

    # Add the same handler to the parser logger
    parser_logger.addHandler(sh)

logger.info(f"Logging configured for {dataset_name}")
2025-12-02 10:11:06 INFO pubchem.protein: Logging configured for pubchem.protein
In [3]:
# Import libraries
import json

# Configure Plotly for HTML output
import plotly.io as pio
import plotly.offline as pyo
from IPython.display import Markdown, display

# Import rdfsolve API functions
from rdfsolve.api import (
    discover_void_graphs,
    generate_void_from_endpoint,
    load_parser_from_graph,
    retrieve_void_from_graphs,
)
from rdfsolve.sparql_helper import SparqlHelper

# Enable query collection to track all SPARQL queries executed
SparqlHelper.enable_query_collection()

# Set renderer to 'notebook' for Jupyter, but ensure HTML export works
pio.renderers.default = "notebook+plotly_mimetype"

# Initialize offline mode for Plotly
pyo.init_notebook_mode(connected=True)
In [4]:
# Pickle caching utilities
import os
import pickle


def save_cache(data, filename, cache_dir=None):
    """Save data to pickle cache."""
    if cache_dir is None:
        cache_dir = os.path.join(exports_path, "cache")
    os.makedirs(cache_dir, exist_ok=True)

    cache_path = os.path.join(cache_dir, f"{filename}.pkl")
    with open(cache_path, "wb") as f:
        pickle.dump(data, f)
    print(f"Cached data to: {cache_path}")
    return cache_path


def load_cache(filename, cache_dir=None):
    """Load data from pickle cache if it exists."""
    if cache_dir is None:
        cache_dir = os.path.join(exports_path, "cache")

    cache_path = os.path.join(cache_dir, f"{filename}.pkl")
    if os.path.exists(cache_path):
        with open(cache_path, "rb") as f:
            data = pickle.load(f)
        print(f"Loaded cached data from: {cache_path}")
        return data
    return None


def cache_exists(filename, cache_dir=None):
    """Check if cache file exists."""
    if cache_dir is None:
        cache_dir = os.path.join(exports_path, "cache")

    cache_path = os.path.join(cache_dir, f"{filename}.pkl")
    return os.path.exists(cache_path)
In [5]:
# Cache management utilities
def list_cache_files(cache_dir=None):
    """List all cache files."""
    if cache_dir is None:
        cache_dir = os.path.join(exports_path, "cache")

    if not os.path.exists(cache_dir):
        print("No cache directory found")
        return []

    cache_files = [f for f in os.listdir(cache_dir) if f.endswith(".pkl")]
    print(f"Cache directory: {cache_dir}")
    for f in cache_files:
        file_path = os.path.join(cache_dir, f)
        size_mb = os.path.getsize(file_path) / (1024 * 1024)
        print(f"  {f} ({size_mb:.2f} MB)")
    return cache_files


def clear_cache(filename=None, cache_dir=None):
    """Clear specific cache file or all cache."""
    if cache_dir is None:
        cache_dir = os.path.join(exports_path, "cache")

    if filename:
        cache_path = os.path.join(cache_dir, f"{filename}.pkl")
        if os.path.exists(cache_path):
            os.remove(cache_path)
            print(f"Removed cache: {filename}")
        else:
            print(f"Cache not found: {filename}")
    else:
        # Clear all cache files
        if os.path.exists(cache_dir):
            import shutil

            shutil.rmtree(cache_dir)
            print("Cleared all cache files")
        else:
            print("No cache directory to clear")


# Show current cache status
list_cache_files()
No cache directory found
Out[5]:
[]

Cache Control¶

Use these cells to manage cached data. When testing new code changes, you may want to clear relevant cache files to force re-computation.

In [6]:
# Clear specific cache files (uncomment lines as needed for testing)

# When testing new VoID discovery/generation:
# clear_cache(f"{dataset_name}_voidgraph")

# When testing JSON-LD generation (primary output):
# clear_cache(f"{dataset_name}_jsonld_schema")

# When testing frequency calculations:
# clear_cache(f"{dataset_name}_frequencies_basic")
# clear_cache(f"{dataset_name}_frequencies_with_instances")

# Clear everything:
clear_cache()

print("Cache control ready")
print("Note: VoID graph and JSON-LD are the primary caches")
No cache directory to clear
Cache control ready
Note: VoID graph and JSON-LD are the primary caches

Discover or get VoID Schema¶

In [7]:
# Discover or generate VoID schema with caching
cache_key = f"{dataset_name}_voidgraph"

# Try to load from cache first
void_graph = load_cache(cache_key)

if void_graph is None:
    print("VoID graph not found in cache, attempting discovery...")

    # Step 1: Try to discover existing VoID graphs
    discovery_result = discover_void_graphs(
        endpoint_url, graph_uris=[graph_uri] if graph_uri else None
    )

    found_graphs = discovery_result.get("found_graphs", [])
    partitions = discovery_result.get("partitions", [])

    if found_graphs and partitions:
        print(f"Found {len(found_graphs)} VoID graphs with {len(partitions)} partitions")
        # Build VoID graph directly from partition data (no CONSTRUCT query needed)
        void_graph = retrieve_void_from_graphs(
            endpoint_url,
            found_graphs,
            graph_uris=[graph_uri] if graph_uri else None,
            partitions=partitions,  # Pass partition data directly
        )

        # Save to file
        void_path = os.path.join(exports_path, f"{dataset_name}_existing_void.ttl")
        void_graph.serialize(destination=void_path, format="turtle")
        print(f"Built VoID graph from: {', '.join(found_graphs)}")
    else:
        print("No VoID graphs found, generating from queries...")
        # Step 2: Generate new VoID if none found
        void_graph = generate_void_from_endpoint(
            endpoint_url=endpoint_url,
            graph_uris=[graph_uri] if graph_uri else None,
            output_file=os.path.join(exports_path, f"{dataset_name}_generated_void.ttl"),
            counts=True,
            offset_limit_steps=300,
            exclude_graphs=True,
        )

    # Cache the VoID graph for future use
    save_cache(void_graph, cache_key)
    print(f"VoID graph cached with {len(void_graph)} triples")
else:
    print(f"Loaded VoID graph from cache ({len(void_graph)} triples)")

# Load parser from the VoID graph
vp = load_parser_from_graph(void_graph, graph_uris=[graph_uri] if graph_uri else None)
VoID graph not found in cache, attempting discovery...
2025-12-02 10:11:07 DEBUG rdfsolve.parser: Starting VoID partition discovery for https://idsm.elixir-czech.cz/sparql/endpoint/idsm
2025-12-02 10:11:07 INFO rdfsolve.parser: Discovering VoID partitions across all graphs
Query attempt 1/3 failed: 500 Server Error: 500 for url: https://idsm.elixir-czech.cz/sparql/endpoint/idsm?query=%0A++++++++PREFIX+void%3A+%3Chttp%3A%2F%2Frdfs.org%2Fns%2Fvoid%23%3E%0A++++++++PREFIX+void-ext%3A+%3Chttp%3A%2F%2Fldf.fi%2Fvoid-ext%23%3E%0A++++++++SELECT+DISTINCT+%3FsubjectClass+%3Fprop+%3FobjectClass+%3FobjectDatatype+%3Fg%0A++++++++WHERE+%7B%0A++++++++++GRAPH+%3Fg+%7B%0A++++++++++++%7B%0A++++++++++++++%3Fcp+void%3Aclass+%3FsubjectClass+%3B%0A++++++++++++++++++void%3ApropertyPartition+%3Fpp+.%0A++++++++++++++%3Fpp+void%3Aproperty+%3Fprop+.%0A++++++++++++++OPTIONAL+%7B%0A++++++++++++++++++%7B%0A++++++++++++++++++++++%3Fpp++void%3AclassPartition+%5B+void%3Aclass+%3FobjectClass+%5D+.%0A++++++++++++++++++%7D+UNION+%7B%0A++++++++++++++++++++++%3Fpp+void-ext%3AdatatypePartition+%5B+void-ext%3Adatatype+%3FobjectDatatype+%5D+.%0A++++++++++++++++++%7D%0A++++++++++++++%7D%0A++++++++++++%7D+UNION+%7B%0A++++++++++++++%3Fls+void%3AsubjectsTarget+%5B+void%3Aclass+%3FsubjectClass+%5D+%3B%0A++++++++++++++++++void%3AlinkPredicate+%3Fprop+%3B%0A++++++++++++++++++void%3AobjectsTarget+%5B+void%3Aclass+%3FobjectClass+%5D+.%0A++++++++++++%7D%0A++++++++++%7D%0A++++++++%7D%0A++++++++
Query attempt 2/3 failed: 500 Server Error: 500 for url: https://idsm.elixir-czech.cz/sparql/endpoint/idsm?query=%0A++++++++PREFIX+void%3A+%3Chttp%3A%2F%2Frdfs.org%2Fns%2Fvoid%23%3E%0A++++++++PREFIX+void-ext%3A+%3Chttp%3A%2F%2Fldf.fi%2Fvoid-ext%23%3E%0A++++++++SELECT+DISTINCT+%3FsubjectClass+%3Fprop+%3FobjectClass+%3FobjectDatatype+%3Fg%0A++++++++WHERE+%7B%0A++++++++++GRAPH+%3Fg+%7B%0A++++++++++++%7B%0A++++++++++++++%3Fcp+void%3Aclass+%3FsubjectClass+%3B%0A++++++++++++++++++void%3ApropertyPartition+%3Fpp+.%0A++++++++++++++%3Fpp+void%3Aproperty+%3Fprop+.%0A++++++++++++++OPTIONAL+%7B%0A++++++++++++++++++%7B%0A++++++++++++++++++++++%3Fpp++void%3AclassPartition+%5B+void%3Aclass+%3FobjectClass+%5D+.%0A++++++++++++++++++%7D+UNION+%7B%0A++++++++++++++++++++++%3Fpp+void-ext%3AdatatypePartition+%5B+void-ext%3Adatatype+%3FobjectDatatype+%5D+.%0A++++++++++++++++++%7D%0A++++++++++++++%7D%0A++++++++++++%7D+UNION+%7B%0A++++++++++++++%3Fls+void%3AsubjectsTarget+%5B+void%3Aclass+%3FsubjectClass+%5D+%3B%0A++++++++++++++++++void%3AlinkPredicate+%3Fprop+%3B%0A++++++++++++++++++void%3AobjectsTarget+%5B+void%3Aclass+%3FobjectClass+%5D+.%0A++++++++++++%7D%0A++++++++++%7D%0A++++++++%7D%0A++++++++
Query attempt 3/3 failed: 500 Server Error: 500 for url: https://idsm.elixir-czech.cz/sparql/endpoint/idsm?query=%0A++++++++PREFIX+void%3A+%3Chttp%3A%2F%2Frdfs.org%2Fns%2Fvoid%23%3E%0A++++++++PREFIX+void-ext%3A+%3Chttp%3A%2F%2Fldf.fi%2Fvoid-ext%23%3E%0A++++++++SELECT+DISTINCT+%3FsubjectClass+%3Fprop+%3FobjectClass+%3FobjectDatatype+%3Fg%0A++++++++WHERE+%7B%0A++++++++++GRAPH+%3Fg+%7B%0A++++++++++++%7B%0A++++++++++++++%3Fcp+void%3Aclass+%3FsubjectClass+%3B%0A++++++++++++++++++void%3ApropertyPartition+%3Fpp+.%0A++++++++++++++%3Fpp+void%3Aproperty+%3Fprop+.%0A++++++++++++++OPTIONAL+%7B%0A++++++++++++++++++%7B%0A++++++++++++++++++++++%3Fpp++void%3AclassPartition+%5B+void%3Aclass+%3FobjectClass+%5D+.%0A++++++++++++++++++%7D+UNION+%7B%0A++++++++++++++++++++++%3Fpp+void-ext%3AdatatypePartition+%5B+void-ext%3Adatatype+%3FobjectDatatype+%5D+.%0A++++++++++++++++++%7D%0A++++++++++++++%7D%0A++++++++++++%7D+UNION+%7B%0A++++++++++++++%3Fls+void%3AsubjectsTarget+%5B+void%3Aclass+%3FsubjectClass+%5D+%3B%0A++++++++++++++++++void%3AlinkPredicate+%3Fprop+%3B%0A++++++++++++++++++void%3AobjectsTarget+%5B+void%3Aclass+%3FobjectClass+%5D+.%0A++++++++++++%7D%0A++++++++++%7D%0A++++++++%7D%0A++++++++
SELECT failed after 3 tries
2025-12-02 10:11:12 INFO rdfsolve.parser: VoID discovery failed: Query failed after 3 attempts: 500 Server Error: 500 for url: https://idsm.elixir-czech.cz/sparql/endpoint/idsm?query=%0A++++++++PREFIX+void%3A+%3Chttp%3A%2F%2Frdfs.org%2Fns%2Fvoid%23%3E%0A++++++++PREFIX+void-ext%3A+%3Chttp%3A%2F%2Fldf.fi%2Fvoid-ext%23%3E%0A++++++++SELECT+DISTINCT+%3FsubjectClass+%3Fprop+%3FobjectClass+%3FobjectDatatype+%3Fg%0A++++++++WHERE+%7B%0A++++++++++GRAPH+%3Fg+%7B%0A++++++++++++%7B%0A++++++++++++++%3Fcp+void%3Aclass+%3FsubjectClass+%3B%0A++++++++++++++++++void%3ApropertyPartition+%3Fpp+.%0A++++++++++++++%3Fpp+void%3Aproperty+%3Fprop+.%0A++++++++++++++OPTIONAL+%7B%0A++++++++++++++++++%7B%0A++++++++++++++++++++++%3Fpp++void%3AclassPartition+%5B+void%3Aclass+%3FobjectClass+%5D+.%0A++++++++++++++++++%7D+UNION+%7B%0A++++++++++++++++++++++%3Fpp+void-ext%3AdatatypePartition+%5B+void-ext%3Adatatype+%3FobjectDatatype+%5D+.%0A++++++++++++++++++%7D%0A++++++++++++++%7D%0A++++++++++++%7D+UNION+%7B%0A++++++++++++++%3Fls+void%3AsubjectsTarget+%5B+void%3Aclass+%3FsubjectClass+%5D+%3B%0A++++++++++++++++++void%3AlinkPredicate+%3Fprop+%3B%0A++++++++++++++++++void%3AobjectsTarget+%5B+void%3Aclass+%3FobjectClass+%5D+.%0A++++++++++++%7D%0A++++++++++%7D%0A++++++++%7D%0A++++++++
2025-12-02 10:11:12 DEBUG rdfsolve.parser: Discovery exception: EndpointError: Query failed after 3 attempts: 500 Server Error: 500 for url: https://idsm.elixir-czech.cz/sparql/endpoint/idsm?query=%0A++++++++PREFIX+void%3A+%3Chttp%3A%2F%2Frdfs.org%2Fns%2Fvoid%23%3E%0A++++++++PREFIX+void-ext%3A+%3Chttp%3A%2F%2Fldf.fi%2Fvoid-ext%23%3E%0A++++++++SELECT+DISTINCT+%3FsubjectClass+%3Fprop+%3FobjectClass+%3FobjectDatatype+%3Fg%0A++++++++WHERE+%7B%0A++++++++++GRAPH+%3Fg+%7B%0A++++++++++++%7B%0A++++++++++++++%3Fcp+void%3Aclass+%3FsubjectClass+%3B%0A++++++++++++++++++void%3ApropertyPartition+%3Fpp+.%0A++++++++++++++%3Fpp+void%3Aproperty+%3Fprop+.%0A++++++++++++++OPTIONAL+%7B%0A++++++++++++++++++%7B%0A++++++++++++++++++++++%3Fpp++void%3AclassPartition+%5B+void%3Aclass+%3FobjectClass+%5D+.%0A++++++++++++++++++%7D+UNION+%7B%0A++++++++++++++++++++++%3Fpp+void-ext%3AdatatypePartition+%5B+void-ext%3Adatatype+%3FobjectDatatype+%5D+.%0A++++++++++++++++++%7D%0A++++++++++++++%7D%0A++++++++++++%7D+UNION+%7B%0A++++++++++++++%3Fls+void%3AsubjectsTarget+%5B+void%3Aclass+%3FsubjectClass+%5D+%3B%0A++++++++++++++++++void%3AlinkPredicate+%3Fprop+%3B%0A++++++++++++++++++void%3AobjectsTarget+%5B+void%3Aclass+%3FobjectClass+%5D+.%0A++++++++++++%7D%0A++++++++++%7D%0A++++++++%7D%0A++++++++
No VoID graphs found, generating from queries...
2025-12-02 10:11:19 INFO rdfsolve.parser: Successfully extracted 770 RDF triples
2025-12-02 10:11:19 INFO rdfsolve.parser: VoID description saved to /home/runner/work/rdfsolve/rdfsolve/notebooks/01_schema_extraction/../../docs/data/schema_extraction/pubchem.protein/pubchem.protein_generated_void.ttl
Cached data to: /home/runner/work/rdfsolve/rdfsolve/notebooks/01_schema_extraction/../../docs/data/schema_extraction/pubchem.protein/cache/pubchem.protein_voidgraph.pkl
VoID graph cached with 770 triples

Schema Discovery and Exports Workflow¶

Workflow Steps:¶

  1. VoID Discovery: Extract schema patterns from SPARQL endpoint VoID descriptions
  2. JSON-LD Generation: Convert to JSON-LD.
  3. Derived Outputs: All other formats are generated from the JSON-LD structure:
    • Frequencies: Schema pattern coverage analysis
    • LinkML: LinkML YAML used elsewhere for other features.
    • CSV/JSON: Tabular and structured data exports
    • RDF: N-Quads serialization for triplestore import
In [8]:
# Primary JSON-LD schema export and basic summary
cache_key = f"{dataset_name}_jsonld_schema"
jsonld_schema = load_cache(cache_key)

if jsonld_schema is None:
    print("Generating JSON-LD schema...")
    jsonld_schema = vp.to_jsonld(filter_void_admin_nodes=True)
    save_cache(jsonld_schema, cache_key)
else:
    print("Loaded JSON-LD schema from cache")

# Save JSON-LD schema file
jsonld_file = os.path.join(exports_path, f"{dataset_name}_schema.jsonld")
with open(jsonld_file, "w", encoding="utf-8") as f:
    json.dump(jsonld_schema, f, indent=2, ensure_ascii=False)

print(f"JSON-LD Schema saved to: {jsonld_file}")

# Display combined JSON-LD structure info and schema summary
if "@graph" in jsonld_schema:
    print("\nSchema Summary:")
    print(f"   • Prefixes: {len(jsonld_schema['@context'])}")
    print(f"   • Resources: {len(jsonld_schema['@graph'])}")

    # Show dataset metadata
    dataset_info = jsonld_schema["@graph"][0] if jsonld_schema["@graph"] else {}
    if dataset_info.get("@type") == "void:Dataset":
        print(f"   • Dataset: {dataset_info.get('dcterms:title', 'Unknown')}")
        print(f"   • Classes: {dataset_info.get('void:classes', 0)}")
        print(f"   • Properties: {dataset_info.get('void:properties', 0)}")
        print(f"   • Triples: {dataset_info.get('void:triples', 0)}")

# Get schema DataFrame and show sample
schema_df = vp.to_schema(filter_void_admin_nodes=True)
print(f"\nSchema Patterns Preview ({len(schema_df)} total):")
display(schema_df.head())
Generating JSON-LD schema...
Cached data to: /home/runner/work/rdfsolve/rdfsolve/notebooks/01_schema_extraction/../../docs/data/schema_extraction/pubchem.protein/cache/pubchem.protein_jsonld_schema.pkl
JSON-LD Schema saved to: /home/runner/work/rdfsolve/rdfsolve/notebooks/01_schema_extraction/../../docs/data/schema_extraction/pubchem.protein/pubchem.protein_schema.jsonld

Schema Summary:
   • Prefixes: 5
   • Resources: 202
Schema Patterns Preview (42218 total):
subject_class subject_uri property property_uri object_class object_uri
0 pr:000000001 http://purl.obolibrary.org/obo/PR_000000001 vocabulary:hasSimilarProtein http://rdf.ncbi.nlm.nih.gov/pubchem/vocabulary... pr:000000001 http://purl.obolibrary.org/obo/PR_000000001
1 pr:000000001 http://purl.obolibrary.org/obo/PR_000000001 vocabulary:hasSimilarProtein http://rdf.ncbi.nlm.nih.gov/pubchem/vocabulary... biopaxleve:Protein http://www.biopax.org/release/biopax-level3.ow...
2 pr:000000001 http://purl.obolibrary.org/obo/PR_000000001 vocabulary:hasSimilarProtein http://rdf.ncbi.nlm.nih.gov/pubchem/vocabulary... vocabulary:Protein http://rdf.ncbi.nlm.nih.gov/pubchem/vocabulary...
3 pr:000000001 http://purl.obolibrary.org/obo/PR_000000001 vocabulary:hasSimilarProtein http://rdf.ncbi.nlm.nih.gov/pubchem/vocabulary... pr:000000732 http://purl.obolibrary.org/obo/PR_000000732
4 pr:000000001 http://purl.obolibrary.org/obo/PR_000000001 vocabulary:hasSimilarProtein http://rdf.ncbi.nlm.nih.gov/pubchem/vocabulary... pr:000000785 http://purl.obolibrary.org/obo/PR_000000785

Schema Pattern Coverage Analysis¶

Calculate coverage ratios showing what percentage of entities use each relationship pattern.

In [9]:
# Schema pattern coverage analysis and export
cache_key = f"{dataset_name}_frequencies_basic"
cached_data = load_cache(cache_key)

if cached_data is None:
    print("Calculating schema pattern frequencies...")
    frequencies_df, _ = vp.count_schema_shape_frequencies(
        endpoint_url=endpoint_url,
        offset_limit_steps=300,
    )
    save_cache(frequencies_df, cache_key)
else:
    print("Loaded frequencies DataFrame from cache")
    frequencies_df = cached_data

# Export coverage analysis
frequencies_output_path = os.path.join(exports_path, f"{dataset_name}_pattern_coverage.csv")
exported_df = vp.export_schema_shape_frequencies(
    frequencies_df, output_file=frequencies_output_path
)

# Combined summary and sample
if not frequencies_df.empty:
    avg_coverage = frequencies_df["coverage_percent"].mean()
    high_coverage = (frequencies_df["coverage_percent"] > 50).sum()

    print("\nPattern Coverage Analysis:")
    print(f"   • Total patterns: {len(frequencies_df)}")
    print(f"   • Average coverage: {avg_coverage:.1f}%")
    print(f"   • High coverage (>50%): {high_coverage}")
    print(f"   • Exported to: {frequencies_output_path}")

    print("\nSample Coverage Data:")
    display(
        frequencies_df[["subject_class", "property", "object_class", "coverage_percent"]].head()
    )

    print("\nCoverage Statistics:")
    display(frequencies_df["coverage_percent"].describe())
else:
    print("No frequency data available")
Calculating schema pattern frequencies...
2025-12-02 10:11:36 INFO rdfsolve.parser: Using chunked pagination for entity counts (step size: 300)
INFO:rdfsolve.parser:Using chunked pagination for entity counts (step size: 300)
2025-12-02 10:11:37 DEBUG rdfsolve.parser: Chunked entity count: chunk 1, rows=300, total=300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 1, rows=300, total=300
2025-12-02 10:11:49 DEBUG rdfsolve.parser: Chunked entity count: chunk 2, rows=300, total=600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 2, rows=300, total=600
2025-12-02 10:12:00 DEBUG rdfsolve.parser: Chunked entity count: chunk 3, rows=300, total=900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 3, rows=300, total=900
2025-12-02 10:12:11 DEBUG rdfsolve.parser: Chunked entity count: chunk 4, rows=300, total=1200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 4, rows=300, total=1200
2025-12-02 10:12:23 DEBUG rdfsolve.parser: Chunked entity count: chunk 5, rows=300, total=1500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 5, rows=300, total=1500
2025-12-02 10:12:35 DEBUG rdfsolve.parser: Chunked entity count: chunk 6, rows=300, total=1800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 6, rows=300, total=1800
2025-12-02 10:12:46 DEBUG rdfsolve.parser: Chunked entity count: chunk 7, rows=300, total=2100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 7, rows=300, total=2100
2025-12-02 10:12:57 DEBUG rdfsolve.parser: Chunked entity count: chunk 8, rows=300, total=2400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 8, rows=300, total=2400
2025-12-02 10:13:09 DEBUG rdfsolve.parser: Chunked entity count: chunk 9, rows=300, total=2700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 9, rows=300, total=2700
2025-12-02 10:13:20 DEBUG rdfsolve.parser: Chunked entity count: chunk 10, rows=300, total=3000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 10, rows=300, total=3000
2025-12-02 10:13:32 DEBUG rdfsolve.parser: Chunked entity count: chunk 11, rows=300, total=3300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 11, rows=300, total=3300
2025-12-02 10:13:43 DEBUG rdfsolve.parser: Chunked entity count: chunk 12, rows=300, total=3600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 12, rows=300, total=3600
2025-12-02 10:13:55 DEBUG rdfsolve.parser: Chunked entity count: chunk 13, rows=300, total=3900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 13, rows=300, total=3900
2025-12-02 10:14:06 DEBUG rdfsolve.parser: Chunked entity count: chunk 14, rows=300, total=4200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 14, rows=300, total=4200
2025-12-02 10:14:17 DEBUG rdfsolve.parser: Chunked entity count: chunk 15, rows=300, total=4500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 15, rows=300, total=4500
2025-12-02 10:14:29 DEBUG rdfsolve.parser: Chunked entity count: chunk 16, rows=300, total=4800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 16, rows=300, total=4800
2025-12-02 10:14:40 DEBUG rdfsolve.parser: Chunked entity count: chunk 17, rows=300, total=5100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 17, rows=300, total=5100
2025-12-02 10:14:52 DEBUG rdfsolve.parser: Chunked entity count: chunk 18, rows=300, total=5400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 18, rows=300, total=5400
2025-12-02 10:15:03 DEBUG rdfsolve.parser: Chunked entity count: chunk 19, rows=300, total=5700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 19, rows=300, total=5700
2025-12-02 10:15:14 DEBUG rdfsolve.parser: Chunked entity count: chunk 20, rows=300, total=6000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 20, rows=300, total=6000
2025-12-02 10:15:26 DEBUG rdfsolve.parser: Chunked entity count: chunk 21, rows=300, total=6300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 21, rows=300, total=6300
2025-12-02 10:15:38 DEBUG rdfsolve.parser: Chunked entity count: chunk 22, rows=300, total=6600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 22, rows=300, total=6600
2025-12-02 10:15:49 DEBUG rdfsolve.parser: Chunked entity count: chunk 23, rows=300, total=6900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 23, rows=300, total=6900
2025-12-02 10:16:00 DEBUG rdfsolve.parser: Chunked entity count: chunk 24, rows=300, total=7200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 24, rows=300, total=7200
2025-12-02 10:16:12 DEBUG rdfsolve.parser: Chunked entity count: chunk 25, rows=300, total=7500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 25, rows=300, total=7500
2025-12-02 10:16:23 DEBUG rdfsolve.parser: Chunked entity count: chunk 26, rows=300, total=7800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 26, rows=300, total=7800
2025-12-02 10:16:35 DEBUG rdfsolve.parser: Chunked entity count: chunk 27, rows=300, total=8100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 27, rows=300, total=8100
2025-12-02 10:16:46 DEBUG rdfsolve.parser: Chunked entity count: chunk 28, rows=300, total=8400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 28, rows=300, total=8400
2025-12-02 10:16:57 DEBUG rdfsolve.parser: Chunked entity count: chunk 29, rows=300, total=8700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 29, rows=300, total=8700
2025-12-02 10:17:09 DEBUG rdfsolve.parser: Chunked entity count: chunk 30, rows=300, total=9000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 30, rows=300, total=9000
2025-12-02 10:17:20 DEBUG rdfsolve.parser: Chunked entity count: chunk 31, rows=300, total=9300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 31, rows=300, total=9300
2025-12-02 10:17:32 DEBUG rdfsolve.parser: Chunked entity count: chunk 32, rows=300, total=9600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 32, rows=300, total=9600
2025-12-02 10:17:43 DEBUG rdfsolve.parser: Chunked entity count: chunk 33, rows=300, total=9900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 33, rows=300, total=9900
2025-12-02 10:17:55 DEBUG rdfsolve.parser: Chunked entity count: chunk 34, rows=300, total=10200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 34, rows=300, total=10200
2025-12-02 10:18:06 DEBUG rdfsolve.parser: Chunked entity count: chunk 35, rows=300, total=10500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 35, rows=300, total=10500
2025-12-02 10:18:17 DEBUG rdfsolve.parser: Chunked entity count: chunk 36, rows=300, total=10800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 36, rows=300, total=10800
2025-12-02 10:18:29 DEBUG rdfsolve.parser: Chunked entity count: chunk 37, rows=300, total=11100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 37, rows=300, total=11100
2025-12-02 10:18:40 DEBUG rdfsolve.parser: Chunked entity count: chunk 38, rows=300, total=11400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 38, rows=300, total=11400
2025-12-02 10:18:52 DEBUG rdfsolve.parser: Chunked entity count: chunk 39, rows=300, total=11700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 39, rows=300, total=11700
2025-12-02 10:19:03 DEBUG rdfsolve.parser: Chunked entity count: chunk 40, rows=300, total=12000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 40, rows=300, total=12000
2025-12-02 10:19:15 DEBUG rdfsolve.parser: Chunked entity count: chunk 41, rows=300, total=12300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 41, rows=300, total=12300
2025-12-02 10:19:26 DEBUG rdfsolve.parser: Chunked entity count: chunk 42, rows=300, total=12600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 42, rows=300, total=12600
2025-12-02 10:19:38 DEBUG rdfsolve.parser: Chunked entity count: chunk 43, rows=300, total=12900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 43, rows=300, total=12900
2025-12-02 10:19:49 DEBUG rdfsolve.parser: Chunked entity count: chunk 44, rows=300, total=13200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 44, rows=300, total=13200
2025-12-02 10:20:01 DEBUG rdfsolve.parser: Chunked entity count: chunk 45, rows=300, total=13500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 45, rows=300, total=13500
2025-12-02 10:20:12 DEBUG rdfsolve.parser: Chunked entity count: chunk 46, rows=300, total=13800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 46, rows=300, total=13800
2025-12-02 10:20:24 DEBUG rdfsolve.parser: Chunked entity count: chunk 47, rows=300, total=14100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 47, rows=300, total=14100
2025-12-02 10:20:35 DEBUG rdfsolve.parser: Chunked entity count: chunk 48, rows=300, total=14400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 48, rows=300, total=14400
2025-12-02 10:20:47 DEBUG rdfsolve.parser: Chunked entity count: chunk 49, rows=300, total=14700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 49, rows=300, total=14700
2025-12-02 10:20:58 DEBUG rdfsolve.parser: Chunked entity count: chunk 50, rows=300, total=15000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 50, rows=300, total=15000
2025-12-02 10:21:10 DEBUG rdfsolve.parser: Chunked entity count: chunk 51, rows=300, total=15300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 51, rows=300, total=15300
2025-12-02 10:21:21 DEBUG rdfsolve.parser: Chunked entity count: chunk 52, rows=300, total=15600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 52, rows=300, total=15600
2025-12-02 10:21:33 DEBUG rdfsolve.parser: Chunked entity count: chunk 53, rows=300, total=15900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 53, rows=300, total=15900
2025-12-02 10:21:44 DEBUG rdfsolve.parser: Chunked entity count: chunk 54, rows=300, total=16200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 54, rows=300, total=16200
2025-12-02 10:21:56 DEBUG rdfsolve.parser: Chunked entity count: chunk 55, rows=300, total=16500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 55, rows=300, total=16500
2025-12-02 10:22:07 DEBUG rdfsolve.parser: Chunked entity count: chunk 56, rows=300, total=16800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 56, rows=300, total=16800
2025-12-02 10:22:18 DEBUG rdfsolve.parser: Chunked entity count: chunk 57, rows=300, total=17100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 57, rows=300, total=17100
2025-12-02 10:22:30 DEBUG rdfsolve.parser: Chunked entity count: chunk 58, rows=300, total=17400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 58, rows=300, total=17400
2025-12-02 10:22:42 DEBUG rdfsolve.parser: Chunked entity count: chunk 59, rows=300, total=17700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 59, rows=300, total=17700
2025-12-02 10:22:53 DEBUG rdfsolve.parser: Chunked entity count: chunk 60, rows=300, total=18000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 60, rows=300, total=18000
2025-12-02 10:23:04 DEBUG rdfsolve.parser: Chunked entity count: chunk 61, rows=300, total=18300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 61, rows=300, total=18300
2025-12-02 10:23:16 DEBUG rdfsolve.parser: Chunked entity count: chunk 62, rows=300, total=18600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 62, rows=300, total=18600
2025-12-02 10:23:27 DEBUG rdfsolve.parser: Chunked entity count: chunk 63, rows=300, total=18900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 63, rows=300, total=18900
2025-12-02 10:23:39 DEBUG rdfsolve.parser: Chunked entity count: chunk 64, rows=300, total=19200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 64, rows=300, total=19200
2025-12-02 10:23:50 DEBUG rdfsolve.parser: Chunked entity count: chunk 65, rows=300, total=19500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 65, rows=300, total=19500
2025-12-02 10:24:02 DEBUG rdfsolve.parser: Chunked entity count: chunk 66, rows=300, total=19800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 66, rows=300, total=19800
2025-12-02 10:24:13 DEBUG rdfsolve.parser: Chunked entity count: chunk 67, rows=300, total=20100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 67, rows=300, total=20100
2025-12-02 10:24:25 DEBUG rdfsolve.parser: Chunked entity count: chunk 68, rows=300, total=20400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 68, rows=300, total=20400
2025-12-02 10:24:37 DEBUG rdfsolve.parser: Chunked entity count: chunk 69, rows=300, total=20700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 69, rows=300, total=20700
2025-12-02 10:24:48 DEBUG rdfsolve.parser: Chunked entity count: chunk 70, rows=300, total=21000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 70, rows=300, total=21000
2025-12-02 10:24:59 DEBUG rdfsolve.parser: Chunked entity count: chunk 71, rows=300, total=21300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 71, rows=300, total=21300
2025-12-02 10:25:11 DEBUG rdfsolve.parser: Chunked entity count: chunk 72, rows=300, total=21600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 72, rows=300, total=21600
2025-12-02 10:25:23 DEBUG rdfsolve.parser: Chunked entity count: chunk 73, rows=300, total=21900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 73, rows=300, total=21900
2025-12-02 10:25:34 DEBUG rdfsolve.parser: Chunked entity count: chunk 74, rows=300, total=22200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 74, rows=300, total=22200
2025-12-02 10:25:45 DEBUG rdfsolve.parser: Chunked entity count: chunk 75, rows=300, total=22500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 75, rows=300, total=22500
2025-12-02 10:25:57 DEBUG rdfsolve.parser: Chunked entity count: chunk 76, rows=300, total=22800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 76, rows=300, total=22800
2025-12-02 10:26:08 DEBUG rdfsolve.parser: Chunked entity count: chunk 77, rows=300, total=23100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 77, rows=300, total=23100
2025-12-02 10:26:20 DEBUG rdfsolve.parser: Chunked entity count: chunk 78, rows=300, total=23400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 78, rows=300, total=23400
2025-12-02 10:26:32 DEBUG rdfsolve.parser: Chunked entity count: chunk 79, rows=300, total=23700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 79, rows=300, total=23700
2025-12-02 10:26:43 DEBUG rdfsolve.parser: Chunked entity count: chunk 80, rows=300, total=24000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 80, rows=300, total=24000
2025-12-02 10:26:55 DEBUG rdfsolve.parser: Chunked entity count: chunk 81, rows=300, total=24300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 81, rows=300, total=24300
2025-12-02 10:27:07 DEBUG rdfsolve.parser: Chunked entity count: chunk 82, rows=300, total=24600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 82, rows=300, total=24600
2025-12-02 10:27:18 DEBUG rdfsolve.parser: Chunked entity count: chunk 83, rows=300, total=24900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 83, rows=300, total=24900
2025-12-02 10:27:30 DEBUG rdfsolve.parser: Chunked entity count: chunk 84, rows=300, total=25200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 84, rows=300, total=25200
2025-12-02 10:27:42 DEBUG rdfsolve.parser: Chunked entity count: chunk 85, rows=300, total=25500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 85, rows=300, total=25500
2025-12-02 10:27:53 DEBUG rdfsolve.parser: Chunked entity count: chunk 86, rows=300, total=25800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 86, rows=300, total=25800
2025-12-02 10:28:05 DEBUG rdfsolve.parser: Chunked entity count: chunk 87, rows=300, total=26100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 87, rows=300, total=26100
2025-12-02 10:28:16 DEBUG rdfsolve.parser: Chunked entity count: chunk 88, rows=300, total=26400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 88, rows=300, total=26400
2025-12-02 10:28:28 DEBUG rdfsolve.parser: Chunked entity count: chunk 89, rows=300, total=26700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 89, rows=300, total=26700
2025-12-02 10:28:39 DEBUG rdfsolve.parser: Chunked entity count: chunk 90, rows=300, total=27000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 90, rows=300, total=27000
2025-12-02 10:28:51 DEBUG rdfsolve.parser: Chunked entity count: chunk 91, rows=300, total=27300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 91, rows=300, total=27300
2025-12-02 10:29:02 DEBUG rdfsolve.parser: Chunked entity count: chunk 92, rows=300, total=27600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 92, rows=300, total=27600
2025-12-02 10:29:14 DEBUG rdfsolve.parser: Chunked entity count: chunk 93, rows=300, total=27900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 93, rows=300, total=27900
2025-12-02 10:29:26 DEBUG rdfsolve.parser: Chunked entity count: chunk 94, rows=300, total=28200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 94, rows=300, total=28200
2025-12-02 10:29:37 DEBUG rdfsolve.parser: Chunked entity count: chunk 95, rows=300, total=28500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 95, rows=300, total=28500
2025-12-02 10:29:49 DEBUG rdfsolve.parser: Chunked entity count: chunk 96, rows=300, total=28800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 96, rows=300, total=28800
2025-12-02 10:30:01 DEBUG rdfsolve.parser: Chunked entity count: chunk 97, rows=300, total=29100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 97, rows=300, total=29100
2025-12-02 10:30:12 DEBUG rdfsolve.parser: Chunked entity count: chunk 98, rows=300, total=29400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 98, rows=300, total=29400
2025-12-02 10:30:23 DEBUG rdfsolve.parser: Chunked entity count: chunk 99, rows=300, total=29700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 99, rows=300, total=29700
2025-12-02 10:30:35 DEBUG rdfsolve.parser: Chunked entity count: chunk 100, rows=300, total=30000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 100, rows=300, total=30000
2025-12-02 10:30:47 DEBUG rdfsolve.parser: Chunked entity count: chunk 101, rows=300, total=30300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 101, rows=300, total=30300
2025-12-02 10:30:58 DEBUG rdfsolve.parser: Chunked entity count: chunk 102, rows=300, total=30600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 102, rows=300, total=30600
2025-12-02 10:31:10 DEBUG rdfsolve.parser: Chunked entity count: chunk 103, rows=300, total=30900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 103, rows=300, total=30900
2025-12-02 10:31:21 DEBUG rdfsolve.parser: Chunked entity count: chunk 104, rows=300, total=31200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 104, rows=300, total=31200
2025-12-02 10:31:33 DEBUG rdfsolve.parser: Chunked entity count: chunk 105, rows=300, total=31500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 105, rows=300, total=31500
2025-12-02 10:31:45 DEBUG rdfsolve.parser: Chunked entity count: chunk 106, rows=300, total=31800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 106, rows=300, total=31800
2025-12-02 10:31:56 DEBUG rdfsolve.parser: Chunked entity count: chunk 107, rows=300, total=32100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 107, rows=300, total=32100
2025-12-02 10:32:08 DEBUG rdfsolve.parser: Chunked entity count: chunk 108, rows=300, total=32400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 108, rows=300, total=32400
2025-12-02 10:32:19 DEBUG rdfsolve.parser: Chunked entity count: chunk 109, rows=300, total=32700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 109, rows=300, total=32700
2025-12-02 10:32:31 DEBUG rdfsolve.parser: Chunked entity count: chunk 110, rows=300, total=33000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 110, rows=300, total=33000
2025-12-02 10:32:43 DEBUG rdfsolve.parser: Chunked entity count: chunk 111, rows=300, total=33300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 111, rows=300, total=33300
2025-12-02 10:32:54 DEBUG rdfsolve.parser: Chunked entity count: chunk 112, rows=300, total=33600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 112, rows=300, total=33600
2025-12-02 10:33:06 DEBUG rdfsolve.parser: Chunked entity count: chunk 113, rows=300, total=33900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 113, rows=300, total=33900
2025-12-02 10:33:18 DEBUG rdfsolve.parser: Chunked entity count: chunk 114, rows=300, total=34200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 114, rows=300, total=34200
2025-12-02 10:33:29 DEBUG rdfsolve.parser: Chunked entity count: chunk 115, rows=300, total=34500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 115, rows=300, total=34500
2025-12-02 10:33:41 DEBUG rdfsolve.parser: Chunked entity count: chunk 116, rows=300, total=34800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 116, rows=300, total=34800
2025-12-02 10:33:52 DEBUG rdfsolve.parser: Chunked entity count: chunk 117, rows=300, total=35100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 117, rows=300, total=35100
2025-12-02 10:34:04 DEBUG rdfsolve.parser: Chunked entity count: chunk 118, rows=300, total=35400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 118, rows=300, total=35400
2025-12-02 10:34:15 DEBUG rdfsolve.parser: Chunked entity count: chunk 119, rows=300, total=35700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 119, rows=300, total=35700
2025-12-02 10:34:27 DEBUG rdfsolve.parser: Chunked entity count: chunk 120, rows=300, total=36000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 120, rows=300, total=36000
2025-12-02 10:34:39 DEBUG rdfsolve.parser: Chunked entity count: chunk 121, rows=300, total=36300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 121, rows=300, total=36300
2025-12-02 10:34:50 DEBUG rdfsolve.parser: Chunked entity count: chunk 122, rows=300, total=36600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 122, rows=300, total=36600
2025-12-02 10:35:02 DEBUG rdfsolve.parser: Chunked entity count: chunk 123, rows=300, total=36900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 123, rows=300, total=36900
2025-12-02 10:35:14 DEBUG rdfsolve.parser: Chunked entity count: chunk 124, rows=300, total=37200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 124, rows=300, total=37200
2025-12-02 10:35:25 DEBUG rdfsolve.parser: Chunked entity count: chunk 125, rows=300, total=37500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 125, rows=300, total=37500
2025-12-02 10:35:37 DEBUG rdfsolve.parser: Chunked entity count: chunk 126, rows=300, total=37800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 126, rows=300, total=37800
2025-12-02 10:35:48 DEBUG rdfsolve.parser: Chunked entity count: chunk 127, rows=300, total=38100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 127, rows=300, total=38100
2025-12-02 10:36:00 DEBUG rdfsolve.parser: Chunked entity count: chunk 128, rows=300, total=38400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 128, rows=300, total=38400
2025-12-02 10:36:11 DEBUG rdfsolve.parser: Chunked entity count: chunk 129, rows=300, total=38700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 129, rows=300, total=38700
2025-12-02 10:36:23 DEBUG rdfsolve.parser: Chunked entity count: chunk 130, rows=300, total=39000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 130, rows=300, total=39000
2025-12-02 10:36:35 DEBUG rdfsolve.parser: Chunked entity count: chunk 131, rows=300, total=39300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 131, rows=300, total=39300
2025-12-02 10:36:46 DEBUG rdfsolve.parser: Chunked entity count: chunk 132, rows=300, total=39600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 132, rows=300, total=39600
2025-12-02 10:36:58 DEBUG rdfsolve.parser: Chunked entity count: chunk 133, rows=300, total=39900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 133, rows=300, total=39900
2025-12-02 10:37:09 DEBUG rdfsolve.parser: Chunked entity count: chunk 134, rows=300, total=40200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 134, rows=300, total=40200
2025-12-02 10:37:21 DEBUG rdfsolve.parser: Chunked entity count: chunk 135, rows=300, total=40500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 135, rows=300, total=40500
2025-12-02 10:37:33 DEBUG rdfsolve.parser: Chunked entity count: chunk 136, rows=300, total=40800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 136, rows=300, total=40800
2025-12-02 10:37:44 DEBUG rdfsolve.parser: Chunked entity count: chunk 137, rows=300, total=41100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 137, rows=300, total=41100
2025-12-02 10:37:56 DEBUG rdfsolve.parser: Chunked entity count: chunk 138, rows=300, total=41400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 138, rows=300, total=41400
2025-12-02 10:38:08 DEBUG rdfsolve.parser: Chunked entity count: chunk 139, rows=300, total=41700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 139, rows=300, total=41700
2025-12-02 10:38:19 DEBUG rdfsolve.parser: Chunked entity count: chunk 140, rows=300, total=42000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 140, rows=300, total=42000
2025-12-02 10:38:31 DEBUG rdfsolve.parser: Chunked entity count: chunk 141, rows=300, total=42300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 141, rows=300, total=42300
2025-12-02 10:38:43 DEBUG rdfsolve.parser: Chunked entity count: chunk 142, rows=300, total=42600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 142, rows=300, total=42600
2025-12-02 10:38:54 DEBUG rdfsolve.parser: Chunked entity count: chunk 143, rows=300, total=42900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 143, rows=300, total=42900
2025-12-02 10:39:06 DEBUG rdfsolve.parser: Chunked entity count: chunk 144, rows=300, total=43200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 144, rows=300, total=43200
2025-12-02 10:39:18 DEBUG rdfsolve.parser: Chunked entity count: chunk 145, rows=300, total=43500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 145, rows=300, total=43500
2025-12-02 10:39:29 DEBUG rdfsolve.parser: Chunked entity count: chunk 146, rows=300, total=43800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 146, rows=300, total=43800
2025-12-02 10:39:41 DEBUG rdfsolve.parser: Chunked entity count: chunk 147, rows=300, total=44100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 147, rows=300, total=44100
2025-12-02 10:39:53 DEBUG rdfsolve.parser: Chunked entity count: chunk 148, rows=300, total=44400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 148, rows=300, total=44400
2025-12-02 10:40:04 DEBUG rdfsolve.parser: Chunked entity count: chunk 149, rows=300, total=44700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 149, rows=300, total=44700
2025-12-02 10:40:16 DEBUG rdfsolve.parser: Chunked entity count: chunk 150, rows=300, total=45000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 150, rows=300, total=45000
2025-12-02 10:40:27 DEBUG rdfsolve.parser: Chunked entity count: chunk 151, rows=300, total=45300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 151, rows=300, total=45300
2025-12-02 10:40:39 DEBUG rdfsolve.parser: Chunked entity count: chunk 152, rows=300, total=45600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 152, rows=300, total=45600
2025-12-02 10:40:51 DEBUG rdfsolve.parser: Chunked entity count: chunk 153, rows=300, total=45900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 153, rows=300, total=45900
2025-12-02 10:41:02 DEBUG rdfsolve.parser: Chunked entity count: chunk 154, rows=300, total=46200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 154, rows=300, total=46200
2025-12-02 10:41:14 DEBUG rdfsolve.parser: Chunked entity count: chunk 155, rows=300, total=46500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 155, rows=300, total=46500
2025-12-02 10:41:26 DEBUG rdfsolve.parser: Chunked entity count: chunk 156, rows=300, total=46800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 156, rows=300, total=46800
2025-12-02 10:41:38 DEBUG rdfsolve.parser: Chunked entity count: chunk 157, rows=300, total=47100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 157, rows=300, total=47100
2025-12-02 10:41:49 DEBUG rdfsolve.parser: Chunked entity count: chunk 158, rows=300, total=47400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 158, rows=300, total=47400
2025-12-02 10:42:01 DEBUG rdfsolve.parser: Chunked entity count: chunk 159, rows=300, total=47700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 159, rows=300, total=47700
2025-12-02 10:42:13 DEBUG rdfsolve.parser: Chunked entity count: chunk 160, rows=300, total=48000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 160, rows=300, total=48000
2025-12-02 10:42:24 DEBUG rdfsolve.parser: Chunked entity count: chunk 161, rows=300, total=48300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 161, rows=300, total=48300
2025-12-02 10:42:36 DEBUG rdfsolve.parser: Chunked entity count: chunk 162, rows=300, total=48600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 162, rows=300, total=48600
2025-12-02 10:42:48 DEBUG rdfsolve.parser: Chunked entity count: chunk 163, rows=300, total=48900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 163, rows=300, total=48900
2025-12-02 10:42:59 DEBUG rdfsolve.parser: Chunked entity count: chunk 164, rows=300, total=49200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 164, rows=300, total=49200
2025-12-02 10:43:11 DEBUG rdfsolve.parser: Chunked entity count: chunk 165, rows=300, total=49500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 165, rows=300, total=49500
2025-12-02 10:43:23 DEBUG rdfsolve.parser: Chunked entity count: chunk 166, rows=300, total=49800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 166, rows=300, total=49800
2025-12-02 10:43:34 DEBUG rdfsolve.parser: Chunked entity count: chunk 167, rows=300, total=50100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 167, rows=300, total=50100
2025-12-02 10:43:46 DEBUG rdfsolve.parser: Chunked entity count: chunk 168, rows=300, total=50400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 168, rows=300, total=50400
2025-12-02 10:43:58 DEBUG rdfsolve.parser: Chunked entity count: chunk 169, rows=300, total=50700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 169, rows=300, total=50700
2025-12-02 10:44:09 DEBUG rdfsolve.parser: Chunked entity count: chunk 170, rows=300, total=51000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 170, rows=300, total=51000
2025-12-02 10:44:21 DEBUG rdfsolve.parser: Chunked entity count: chunk 171, rows=300, total=51300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 171, rows=300, total=51300
2025-12-02 10:44:33 DEBUG rdfsolve.parser: Chunked entity count: chunk 172, rows=300, total=51600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 172, rows=300, total=51600
WARNING:rdfsolve.sparql_helper:Chunk query failed at offset 51600: HTTP 400: 400 Client Error: 400 for url: https://idsm.elixir-czech.cz/sparql/endpoint/idsm?query=%0A++++++++SELECT+%3Fclass+%28COUNT%28DISTINCT+%3Fs%29+AS+%3Ftotal%29+WHERE+%7B%0A++++++++++++GRAPH+%3Chttp%3A%2F%2Frdf.ncbi.nlm.nih.gov%2Fpubchem%2Fprotein%3E+%7B%0A++++++++++++++++%3Fs+a+%3Fclass+.%0A++++++++++++%7D%0A++++++++%7D%0A++++++++GROUP+BY+%3Fclass%0A++++++++ORDER+BY+DESC%28%3Ftotal%29%0A++++++++%0AOFFSET+51600%0ALIMIT+300
2025-12-02 10:45:16 INFO rdfsolve.parser: Chunked entity counting complete: 172 chunks, 51600 total results
INFO:rdfsolve.parser:Chunked entity counting complete: 172 chunks, 51600 total results
Cached data to: /home/runner/work/rdfsolve/rdfsolve/notebooks/01_schema_extraction/../../docs/data/schema_extraction/pubchem.protein/cache/pubchem.protein_frequencies_basic.pkl
Pattern Coverage Analysis:
   • Total patterns: 42218
   • Average coverage: 1.1%
   • High coverage (>50%): 418
   • Exported to: /home/runner/work/rdfsolve/rdfsolve/notebooks/01_schema_extraction/../../docs/data/schema_extraction/pubchem.protein/pubchem.protein_pattern_coverage.csv

Sample Coverage Data:
subject_class property object_class coverage_percent
21946 pr:P26897 vocabulary:hasSimilarProtein biopaxleve:Protein 100.0
21947 pr:P26897 vocabulary:hasSimilarProtein vocabulary:Protein 100.0
14005 pr:000032431 vocabulary:hasSimilarProtein vocabulary:Protein 100.0
32187 pr:Q4G050 vocabulary:hasSimilarProtein biopaxleve:Protein 100.0
32188 pr:Q4G050 vocabulary:hasSimilarProtein vocabulary:Protein 100.0
Coverage Statistics:
count    42218.000000
mean         1.081410
std          9.931853
min          0.000000
25%          0.000000
50%          0.000000
75%          0.000000
max        100.000000
Name: coverage_percent, dtype: float64

Schema Pattern Instance Collection¶

Collect actual subject and object IRI instances for each schema pattern. This provides detailed access to the specific entities participating in each relationship pattern.

In [10]:
# Collect both frequency data and actual instances with caching
cache_key = f"{dataset_name}_frequencies_with_instances"
cached_data = load_cache(cache_key)

if cached_data is None:
    print("Collecting frequency data and instances...")
    frequencies_with_instances_df, instances_df = vp.count_schema_shape_frequencies(
        endpoint_url=endpoint_url,
        # sample_limit=100,  # Limited sample for demonstration
        collect_instances=True,
        offset_limit_steps=300,
    )
    # Cache both DataFrames as a tuple
    save_cache((frequencies_with_instances_df, instances_df), cache_key)
else:
    print("Loaded frequencies and instances DataFrames from cache")
    frequencies_with_instances_df, instances_df = cached_data

# Display basic information about the data structure
print(f"Frequencies DataFrame: {len(frequencies_with_instances_df)} shapes")
if frequencies_with_instances_df is not None:
    print(
        f"Memory usage - Instances: {frequencies_with_instances_df.memory_usage(deep=True).sum() / 1024:.1f} KB"
    )
else:
    print("No instances collected")
Collecting frequency data and instances...
2025-12-02 11:33:22 INFO rdfsolve.parser: Using chunked pagination for entity counts (step size: 300)
INFO:rdfsolve.parser:Using chunked pagination for entity counts (step size: 300)
2025-12-02 11:33:24 DEBUG rdfsolve.parser: Chunked entity count: chunk 1, rows=300, total=300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 1, rows=300, total=300
2025-12-02 11:33:35 DEBUG rdfsolve.parser: Chunked entity count: chunk 2, rows=300, total=600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 2, rows=300, total=600
2025-12-02 11:33:46 DEBUG rdfsolve.parser: Chunked entity count: chunk 3, rows=300, total=900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 3, rows=300, total=900
2025-12-02 11:33:58 DEBUG rdfsolve.parser: Chunked entity count: chunk 4, rows=300, total=1200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 4, rows=300, total=1200
2025-12-02 11:34:09 DEBUG rdfsolve.parser: Chunked entity count: chunk 5, rows=300, total=1500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 5, rows=300, total=1500
2025-12-02 11:34:20 DEBUG rdfsolve.parser: Chunked entity count: chunk 6, rows=300, total=1800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 6, rows=300, total=1800
2025-12-02 11:34:32 DEBUG rdfsolve.parser: Chunked entity count: chunk 7, rows=300, total=2100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 7, rows=300, total=2100
2025-12-02 11:34:43 DEBUG rdfsolve.parser: Chunked entity count: chunk 8, rows=300, total=2400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 8, rows=300, total=2400
2025-12-02 11:34:54 DEBUG rdfsolve.parser: Chunked entity count: chunk 9, rows=300, total=2700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 9, rows=300, total=2700
2025-12-02 11:35:05 DEBUG rdfsolve.parser: Chunked entity count: chunk 10, rows=300, total=3000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 10, rows=300, total=3000
2025-12-02 11:35:17 DEBUG rdfsolve.parser: Chunked entity count: chunk 11, rows=300, total=3300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 11, rows=300, total=3300
2025-12-02 11:35:28 DEBUG rdfsolve.parser: Chunked entity count: chunk 12, rows=300, total=3600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 12, rows=300, total=3600
2025-12-02 11:35:40 DEBUG rdfsolve.parser: Chunked entity count: chunk 13, rows=300, total=3900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 13, rows=300, total=3900
2025-12-02 11:35:51 DEBUG rdfsolve.parser: Chunked entity count: chunk 14, rows=300, total=4200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 14, rows=300, total=4200
2025-12-02 11:36:02 DEBUG rdfsolve.parser: Chunked entity count: chunk 15, rows=300, total=4500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 15, rows=300, total=4500
2025-12-02 11:36:14 DEBUG rdfsolve.parser: Chunked entity count: chunk 16, rows=300, total=4800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 16, rows=300, total=4800
2025-12-02 11:36:25 DEBUG rdfsolve.parser: Chunked entity count: chunk 17, rows=300, total=5100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 17, rows=300, total=5100
2025-12-02 11:36:37 DEBUG rdfsolve.parser: Chunked entity count: chunk 18, rows=300, total=5400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 18, rows=300, total=5400
2025-12-02 11:36:48 DEBUG rdfsolve.parser: Chunked entity count: chunk 19, rows=300, total=5700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 19, rows=300, total=5700
2025-12-02 11:36:59 DEBUG rdfsolve.parser: Chunked entity count: chunk 20, rows=300, total=6000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 20, rows=300, total=6000
2025-12-02 11:37:11 DEBUG rdfsolve.parser: Chunked entity count: chunk 21, rows=300, total=6300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 21, rows=300, total=6300
2025-12-02 11:37:22 DEBUG rdfsolve.parser: Chunked entity count: chunk 22, rows=300, total=6600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 22, rows=300, total=6600
2025-12-02 11:37:33 DEBUG rdfsolve.parser: Chunked entity count: chunk 23, rows=300, total=6900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 23, rows=300, total=6900
2025-12-02 11:37:45 DEBUG rdfsolve.parser: Chunked entity count: chunk 24, rows=300, total=7200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 24, rows=300, total=7200
2025-12-02 11:37:56 DEBUG rdfsolve.parser: Chunked entity count: chunk 25, rows=300, total=7500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 25, rows=300, total=7500
2025-12-02 11:38:08 DEBUG rdfsolve.parser: Chunked entity count: chunk 26, rows=300, total=7800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 26, rows=300, total=7800
2025-12-02 11:38:19 DEBUG rdfsolve.parser: Chunked entity count: chunk 27, rows=300, total=8100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 27, rows=300, total=8100
2025-12-02 11:38:31 DEBUG rdfsolve.parser: Chunked entity count: chunk 28, rows=300, total=8400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 28, rows=300, total=8400
2025-12-02 11:38:42 DEBUG rdfsolve.parser: Chunked entity count: chunk 29, rows=300, total=8700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 29, rows=300, total=8700
2025-12-02 11:38:54 DEBUG rdfsolve.parser: Chunked entity count: chunk 30, rows=300, total=9000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 30, rows=300, total=9000
2025-12-02 11:39:05 DEBUG rdfsolve.parser: Chunked entity count: chunk 31, rows=300, total=9300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 31, rows=300, total=9300
2025-12-02 11:39:16 DEBUG rdfsolve.parser: Chunked entity count: chunk 32, rows=300, total=9600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 32, rows=300, total=9600
2025-12-02 11:39:28 DEBUG rdfsolve.parser: Chunked entity count: chunk 33, rows=300, total=9900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 33, rows=300, total=9900
2025-12-02 11:39:39 DEBUG rdfsolve.parser: Chunked entity count: chunk 34, rows=300, total=10200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 34, rows=300, total=10200
2025-12-02 11:39:51 DEBUG rdfsolve.parser: Chunked entity count: chunk 35, rows=300, total=10500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 35, rows=300, total=10500
2025-12-02 11:40:02 DEBUG rdfsolve.parser: Chunked entity count: chunk 36, rows=300, total=10800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 36, rows=300, total=10800
2025-12-02 11:40:14 DEBUG rdfsolve.parser: Chunked entity count: chunk 37, rows=300, total=11100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 37, rows=300, total=11100
2025-12-02 11:40:25 DEBUG rdfsolve.parser: Chunked entity count: chunk 38, rows=300, total=11400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 38, rows=300, total=11400
2025-12-02 11:40:36 DEBUG rdfsolve.parser: Chunked entity count: chunk 39, rows=300, total=11700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 39, rows=300, total=11700
2025-12-02 11:40:48 DEBUG rdfsolve.parser: Chunked entity count: chunk 40, rows=300, total=12000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 40, rows=300, total=12000
2025-12-02 11:40:59 DEBUG rdfsolve.parser: Chunked entity count: chunk 41, rows=300, total=12300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 41, rows=300, total=12300
2025-12-02 11:41:11 DEBUG rdfsolve.parser: Chunked entity count: chunk 42, rows=300, total=12600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 42, rows=300, total=12600
2025-12-02 11:41:22 DEBUG rdfsolve.parser: Chunked entity count: chunk 43, rows=300, total=12900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 43, rows=300, total=12900
2025-12-02 11:41:34 DEBUG rdfsolve.parser: Chunked entity count: chunk 44, rows=300, total=13200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 44, rows=300, total=13200
2025-12-02 11:41:45 DEBUG rdfsolve.parser: Chunked entity count: chunk 45, rows=300, total=13500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 45, rows=300, total=13500
2025-12-02 11:41:57 DEBUG rdfsolve.parser: Chunked entity count: chunk 46, rows=300, total=13800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 46, rows=300, total=13800
2025-12-02 11:42:08 DEBUG rdfsolve.parser: Chunked entity count: chunk 47, rows=300, total=14100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 47, rows=300, total=14100
2025-12-02 11:42:19 DEBUG rdfsolve.parser: Chunked entity count: chunk 48, rows=300, total=14400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 48, rows=300, total=14400
2025-12-02 11:42:31 DEBUG rdfsolve.parser: Chunked entity count: chunk 49, rows=300, total=14700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 49, rows=300, total=14700
2025-12-02 11:42:42 DEBUG rdfsolve.parser: Chunked entity count: chunk 50, rows=300, total=15000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 50, rows=300, total=15000
2025-12-02 11:42:54 DEBUG rdfsolve.parser: Chunked entity count: chunk 51, rows=300, total=15300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 51, rows=300, total=15300
2025-12-02 11:43:05 DEBUG rdfsolve.parser: Chunked entity count: chunk 52, rows=300, total=15600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 52, rows=300, total=15600
2025-12-02 11:43:17 DEBUG rdfsolve.parser: Chunked entity count: chunk 53, rows=300, total=15900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 53, rows=300, total=15900
2025-12-02 11:43:28 DEBUG rdfsolve.parser: Chunked entity count: chunk 54, rows=300, total=16200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 54, rows=300, total=16200
2025-12-02 11:43:40 DEBUG rdfsolve.parser: Chunked entity count: chunk 55, rows=300, total=16500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 55, rows=300, total=16500
2025-12-02 11:43:51 DEBUG rdfsolve.parser: Chunked entity count: chunk 56, rows=300, total=16800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 56, rows=300, total=16800
2025-12-02 11:44:03 DEBUG rdfsolve.parser: Chunked entity count: chunk 57, rows=300, total=17100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 57, rows=300, total=17100
2025-12-02 11:44:14 DEBUG rdfsolve.parser: Chunked entity count: chunk 58, rows=300, total=17400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 58, rows=300, total=17400
2025-12-02 11:44:25 DEBUG rdfsolve.parser: Chunked entity count: chunk 59, rows=300, total=17700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 59, rows=300, total=17700
2025-12-02 11:44:37 DEBUG rdfsolve.parser: Chunked entity count: chunk 60, rows=300, total=18000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 60, rows=300, total=18000
2025-12-02 11:44:48 DEBUG rdfsolve.parser: Chunked entity count: chunk 61, rows=300, total=18300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 61, rows=300, total=18300
2025-12-02 11:45:00 DEBUG rdfsolve.parser: Chunked entity count: chunk 62, rows=300, total=18600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 62, rows=300, total=18600
2025-12-02 11:45:12 DEBUG rdfsolve.parser: Chunked entity count: chunk 63, rows=300, total=18900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 63, rows=300, total=18900
2025-12-02 11:45:23 DEBUG rdfsolve.parser: Chunked entity count: chunk 64, rows=300, total=19200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 64, rows=300, total=19200
2025-12-02 11:45:35 DEBUG rdfsolve.parser: Chunked entity count: chunk 65, rows=300, total=19500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 65, rows=300, total=19500
2025-12-02 11:45:46 DEBUG rdfsolve.parser: Chunked entity count: chunk 66, rows=300, total=19800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 66, rows=300, total=19800
2025-12-02 11:45:57 DEBUG rdfsolve.parser: Chunked entity count: chunk 67, rows=300, total=20100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 67, rows=300, total=20100
2025-12-02 11:46:09 DEBUG rdfsolve.parser: Chunked entity count: chunk 68, rows=300, total=20400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 68, rows=300, total=20400
2025-12-02 11:46:20 DEBUG rdfsolve.parser: Chunked entity count: chunk 69, rows=300, total=20700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 69, rows=300, total=20700
2025-12-02 11:46:32 DEBUG rdfsolve.parser: Chunked entity count: chunk 70, rows=300, total=21000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 70, rows=300, total=21000
2025-12-02 11:46:43 DEBUG rdfsolve.parser: Chunked entity count: chunk 71, rows=300, total=21300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 71, rows=300, total=21300
2025-12-02 11:46:55 DEBUG rdfsolve.parser: Chunked entity count: chunk 72, rows=300, total=21600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 72, rows=300, total=21600
2025-12-02 11:47:06 DEBUG rdfsolve.parser: Chunked entity count: chunk 73, rows=300, total=21900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 73, rows=300, total=21900
2025-12-02 11:47:18 DEBUG rdfsolve.parser: Chunked entity count: chunk 74, rows=300, total=22200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 74, rows=300, total=22200
2025-12-02 11:47:29 DEBUG rdfsolve.parser: Chunked entity count: chunk 75, rows=300, total=22500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 75, rows=300, total=22500
2025-12-02 11:47:41 DEBUG rdfsolve.parser: Chunked entity count: chunk 76, rows=300, total=22800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 76, rows=300, total=22800
2025-12-02 11:47:52 DEBUG rdfsolve.parser: Chunked entity count: chunk 77, rows=300, total=23100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 77, rows=300, total=23100
2025-12-02 11:48:04 DEBUG rdfsolve.parser: Chunked entity count: chunk 78, rows=300, total=23400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 78, rows=300, total=23400
2025-12-02 11:48:16 DEBUG rdfsolve.parser: Chunked entity count: chunk 79, rows=300, total=23700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 79, rows=300, total=23700
2025-12-02 11:48:27 DEBUG rdfsolve.parser: Chunked entity count: chunk 80, rows=300, total=24000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 80, rows=300, total=24000
2025-12-02 11:48:39 DEBUG rdfsolve.parser: Chunked entity count: chunk 81, rows=300, total=24300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 81, rows=300, total=24300
2025-12-02 11:48:50 DEBUG rdfsolve.parser: Chunked entity count: chunk 82, rows=300, total=24600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 82, rows=300, total=24600
2025-12-02 11:49:01 DEBUG rdfsolve.parser: Chunked entity count: chunk 83, rows=300, total=24900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 83, rows=300, total=24900
2025-12-02 11:49:13 DEBUG rdfsolve.parser: Chunked entity count: chunk 84, rows=300, total=25200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 84, rows=300, total=25200
2025-12-02 11:49:24 DEBUG rdfsolve.parser: Chunked entity count: chunk 85, rows=300, total=25500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 85, rows=300, total=25500
2025-12-02 11:49:36 DEBUG rdfsolve.parser: Chunked entity count: chunk 86, rows=300, total=25800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 86, rows=300, total=25800
2025-12-02 11:49:47 DEBUG rdfsolve.parser: Chunked entity count: chunk 87, rows=300, total=26100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 87, rows=300, total=26100
2025-12-02 11:49:59 DEBUG rdfsolve.parser: Chunked entity count: chunk 88, rows=300, total=26400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 88, rows=300, total=26400
2025-12-02 11:50:11 DEBUG rdfsolve.parser: Chunked entity count: chunk 89, rows=300, total=26700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 89, rows=300, total=26700
2025-12-02 11:50:22 DEBUG rdfsolve.parser: Chunked entity count: chunk 90, rows=300, total=27000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 90, rows=300, total=27000
2025-12-02 11:50:34 DEBUG rdfsolve.parser: Chunked entity count: chunk 91, rows=300, total=27300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 91, rows=300, total=27300
2025-12-02 11:50:45 DEBUG rdfsolve.parser: Chunked entity count: chunk 92, rows=300, total=27600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 92, rows=300, total=27600
2025-12-02 11:50:57 DEBUG rdfsolve.parser: Chunked entity count: chunk 93, rows=300, total=27900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 93, rows=300, total=27900
2025-12-02 11:51:08 DEBUG rdfsolve.parser: Chunked entity count: chunk 94, rows=300, total=28200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 94, rows=300, total=28200
2025-12-02 11:51:20 DEBUG rdfsolve.parser: Chunked entity count: chunk 95, rows=300, total=28500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 95, rows=300, total=28500
2025-12-02 11:51:31 DEBUG rdfsolve.parser: Chunked entity count: chunk 96, rows=300, total=28800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 96, rows=300, total=28800
2025-12-02 11:51:43 DEBUG rdfsolve.parser: Chunked entity count: chunk 97, rows=300, total=29100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 97, rows=300, total=29100
2025-12-02 11:51:54 DEBUG rdfsolve.parser: Chunked entity count: chunk 98, rows=300, total=29400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 98, rows=300, total=29400
2025-12-02 11:52:06 DEBUG rdfsolve.parser: Chunked entity count: chunk 99, rows=300, total=29700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 99, rows=300, total=29700
2025-12-02 11:52:17 DEBUG rdfsolve.parser: Chunked entity count: chunk 100, rows=300, total=30000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 100, rows=300, total=30000
2025-12-02 11:52:29 DEBUG rdfsolve.parser: Chunked entity count: chunk 101, rows=300, total=30300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 101, rows=300, total=30300
2025-12-02 11:52:40 DEBUG rdfsolve.parser: Chunked entity count: chunk 102, rows=300, total=30600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 102, rows=300, total=30600
2025-12-02 11:52:52 DEBUG rdfsolve.parser: Chunked entity count: chunk 103, rows=300, total=30900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 103, rows=300, total=30900
2025-12-02 11:53:04 DEBUG rdfsolve.parser: Chunked entity count: chunk 104, rows=300, total=31200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 104, rows=300, total=31200
2025-12-02 11:53:15 DEBUG rdfsolve.parser: Chunked entity count: chunk 105, rows=300, total=31500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 105, rows=300, total=31500
2025-12-02 11:53:27 DEBUG rdfsolve.parser: Chunked entity count: chunk 106, rows=300, total=31800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 106, rows=300, total=31800
2025-12-02 11:53:39 DEBUG rdfsolve.parser: Chunked entity count: chunk 107, rows=300, total=32100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 107, rows=300, total=32100
2025-12-02 11:53:50 DEBUG rdfsolve.parser: Chunked entity count: chunk 108, rows=300, total=32400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 108, rows=300, total=32400
2025-12-02 11:54:02 DEBUG rdfsolve.parser: Chunked entity count: chunk 109, rows=300, total=32700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 109, rows=300, total=32700
2025-12-02 11:54:13 DEBUG rdfsolve.parser: Chunked entity count: chunk 110, rows=300, total=33000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 110, rows=300, total=33000
2025-12-02 11:54:25 DEBUG rdfsolve.parser: Chunked entity count: chunk 111, rows=300, total=33300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 111, rows=300, total=33300
2025-12-02 11:54:37 DEBUG rdfsolve.parser: Chunked entity count: chunk 112, rows=300, total=33600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 112, rows=300, total=33600
2025-12-02 11:54:48 DEBUG rdfsolve.parser: Chunked entity count: chunk 113, rows=300, total=33900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 113, rows=300, total=33900
2025-12-02 11:55:00 DEBUG rdfsolve.parser: Chunked entity count: chunk 114, rows=300, total=34200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 114, rows=300, total=34200
2025-12-02 11:55:11 DEBUG rdfsolve.parser: Chunked entity count: chunk 115, rows=300, total=34500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 115, rows=300, total=34500
2025-12-02 11:55:23 DEBUG rdfsolve.parser: Chunked entity count: chunk 116, rows=300, total=34800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 116, rows=300, total=34800
2025-12-02 11:55:35 DEBUG rdfsolve.parser: Chunked entity count: chunk 117, rows=300, total=35100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 117, rows=300, total=35100
2025-12-02 11:55:46 DEBUG rdfsolve.parser: Chunked entity count: chunk 118, rows=300, total=35400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 118, rows=300, total=35400
2025-12-02 11:55:58 DEBUG rdfsolve.parser: Chunked entity count: chunk 119, rows=300, total=35700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 119, rows=300, total=35700
2025-12-02 11:56:10 DEBUG rdfsolve.parser: Chunked entity count: chunk 120, rows=300, total=36000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 120, rows=300, total=36000
2025-12-02 11:56:21 DEBUG rdfsolve.parser: Chunked entity count: chunk 121, rows=300, total=36300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 121, rows=300, total=36300
2025-12-02 11:56:33 DEBUG rdfsolve.parser: Chunked entity count: chunk 122, rows=300, total=36600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 122, rows=300, total=36600
2025-12-02 11:56:44 DEBUG rdfsolve.parser: Chunked entity count: chunk 123, rows=300, total=36900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 123, rows=300, total=36900
2025-12-02 11:56:56 DEBUG rdfsolve.parser: Chunked entity count: chunk 124, rows=300, total=37200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 124, rows=300, total=37200
2025-12-02 11:57:08 DEBUG rdfsolve.parser: Chunked entity count: chunk 125, rows=300, total=37500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 125, rows=300, total=37500
2025-12-02 11:57:19 DEBUG rdfsolve.parser: Chunked entity count: chunk 126, rows=300, total=37800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 126, rows=300, total=37800
2025-12-02 11:57:31 DEBUG rdfsolve.parser: Chunked entity count: chunk 127, rows=300, total=38100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 127, rows=300, total=38100
2025-12-02 11:57:42 DEBUG rdfsolve.parser: Chunked entity count: chunk 128, rows=300, total=38400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 128, rows=300, total=38400
2025-12-02 11:57:54 DEBUG rdfsolve.parser: Chunked entity count: chunk 129, rows=300, total=38700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 129, rows=300, total=38700
2025-12-02 11:58:06 DEBUG rdfsolve.parser: Chunked entity count: chunk 130, rows=300, total=39000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 130, rows=300, total=39000
2025-12-02 11:58:17 DEBUG rdfsolve.parser: Chunked entity count: chunk 131, rows=300, total=39300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 131, rows=300, total=39300
2025-12-02 11:58:29 DEBUG rdfsolve.parser: Chunked entity count: chunk 132, rows=300, total=39600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 132, rows=300, total=39600
2025-12-02 11:58:40 DEBUG rdfsolve.parser: Chunked entity count: chunk 133, rows=300, total=39900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 133, rows=300, total=39900
2025-12-02 11:58:52 DEBUG rdfsolve.parser: Chunked entity count: chunk 134, rows=300, total=40200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 134, rows=300, total=40200
2025-12-02 11:59:04 DEBUG rdfsolve.parser: Chunked entity count: chunk 135, rows=300, total=40500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 135, rows=300, total=40500
2025-12-02 11:59:15 DEBUG rdfsolve.parser: Chunked entity count: chunk 136, rows=300, total=40800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 136, rows=300, total=40800
2025-12-02 11:59:27 DEBUG rdfsolve.parser: Chunked entity count: chunk 137, rows=300, total=41100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 137, rows=300, total=41100
2025-12-02 11:59:38 DEBUG rdfsolve.parser: Chunked entity count: chunk 138, rows=300, total=41400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 138, rows=300, total=41400
2025-12-02 11:59:50 DEBUG rdfsolve.parser: Chunked entity count: chunk 139, rows=300, total=41700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 139, rows=300, total=41700
2025-12-02 12:00:01 DEBUG rdfsolve.parser: Chunked entity count: chunk 140, rows=300, total=42000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 140, rows=300, total=42000
2025-12-02 12:00:13 DEBUG rdfsolve.parser: Chunked entity count: chunk 141, rows=300, total=42300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 141, rows=300, total=42300
2025-12-02 12:00:25 DEBUG rdfsolve.parser: Chunked entity count: chunk 142, rows=300, total=42600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 142, rows=300, total=42600
2025-12-02 12:00:36 DEBUG rdfsolve.parser: Chunked entity count: chunk 143, rows=300, total=42900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 143, rows=300, total=42900
2025-12-02 12:00:48 DEBUG rdfsolve.parser: Chunked entity count: chunk 144, rows=300, total=43200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 144, rows=300, total=43200
2025-12-02 12:01:00 DEBUG rdfsolve.parser: Chunked entity count: chunk 145, rows=300, total=43500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 145, rows=300, total=43500
2025-12-02 12:01:11 DEBUG rdfsolve.parser: Chunked entity count: chunk 146, rows=300, total=43800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 146, rows=300, total=43800
2025-12-02 12:01:23 DEBUG rdfsolve.parser: Chunked entity count: chunk 147, rows=300, total=44100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 147, rows=300, total=44100
2025-12-02 12:01:35 DEBUG rdfsolve.parser: Chunked entity count: chunk 148, rows=300, total=44400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 148, rows=300, total=44400
2025-12-02 12:01:46 DEBUG rdfsolve.parser: Chunked entity count: chunk 149, rows=300, total=44700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 149, rows=300, total=44700
2025-12-02 12:01:58 DEBUG rdfsolve.parser: Chunked entity count: chunk 150, rows=300, total=45000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 150, rows=300, total=45000
2025-12-02 12:02:10 DEBUG rdfsolve.parser: Chunked entity count: chunk 151, rows=300, total=45300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 151, rows=300, total=45300
2025-12-02 12:02:21 DEBUG rdfsolve.parser: Chunked entity count: chunk 152, rows=300, total=45600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 152, rows=300, total=45600
2025-12-02 12:02:33 DEBUG rdfsolve.parser: Chunked entity count: chunk 153, rows=300, total=45900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 153, rows=300, total=45900
2025-12-02 12:02:44 DEBUG rdfsolve.parser: Chunked entity count: chunk 154, rows=300, total=46200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 154, rows=300, total=46200
2025-12-02 12:02:56 DEBUG rdfsolve.parser: Chunked entity count: chunk 155, rows=300, total=46500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 155, rows=300, total=46500
2025-12-02 12:03:08 DEBUG rdfsolve.parser: Chunked entity count: chunk 156, rows=300, total=46800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 156, rows=300, total=46800
2025-12-02 12:03:19 DEBUG rdfsolve.parser: Chunked entity count: chunk 157, rows=300, total=47100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 157, rows=300, total=47100
2025-12-02 12:03:31 DEBUG rdfsolve.parser: Chunked entity count: chunk 158, rows=300, total=47400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 158, rows=300, total=47400
2025-12-02 12:03:43 DEBUG rdfsolve.parser: Chunked entity count: chunk 159, rows=300, total=47700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 159, rows=300, total=47700
2025-12-02 12:03:54 DEBUG rdfsolve.parser: Chunked entity count: chunk 160, rows=300, total=48000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 160, rows=300, total=48000
2025-12-02 12:04:06 DEBUG rdfsolve.parser: Chunked entity count: chunk 161, rows=300, total=48300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 161, rows=300, total=48300
2025-12-02 12:04:17 DEBUG rdfsolve.parser: Chunked entity count: chunk 162, rows=300, total=48600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 162, rows=300, total=48600
2025-12-02 12:04:29 DEBUG rdfsolve.parser: Chunked entity count: chunk 163, rows=300, total=48900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 163, rows=300, total=48900
2025-12-02 12:04:41 DEBUG rdfsolve.parser: Chunked entity count: chunk 164, rows=300, total=49200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 164, rows=300, total=49200
2025-12-02 12:04:52 DEBUG rdfsolve.parser: Chunked entity count: chunk 165, rows=300, total=49500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 165, rows=300, total=49500
2025-12-02 12:05:04 DEBUG rdfsolve.parser: Chunked entity count: chunk 166, rows=300, total=49800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 166, rows=300, total=49800
2025-12-02 12:05:16 DEBUG rdfsolve.parser: Chunked entity count: chunk 167, rows=300, total=50100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 167, rows=300, total=50100
2025-12-02 12:05:27 DEBUG rdfsolve.parser: Chunked entity count: chunk 168, rows=300, total=50400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 168, rows=300, total=50400
2025-12-02 12:05:39 DEBUG rdfsolve.parser: Chunked entity count: chunk 169, rows=300, total=50700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 169, rows=300, total=50700
2025-12-02 12:05:51 DEBUG rdfsolve.parser: Chunked entity count: chunk 170, rows=300, total=51000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 170, rows=300, total=51000
2025-12-02 12:06:02 DEBUG rdfsolve.parser: Chunked entity count: chunk 171, rows=300, total=51300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 171, rows=300, total=51300
2025-12-02 12:06:14 DEBUG rdfsolve.parser: Chunked entity count: chunk 172, rows=300, total=51600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 172, rows=300, total=51600
2025-12-02 12:06:25 DEBUG rdfsolve.parser: Chunked entity count: chunk 173, rows=300, total=51900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 173, rows=300, total=51900
2025-12-02 12:06:37 DEBUG rdfsolve.parser: Chunked entity count: chunk 174, rows=300, total=52200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 174, rows=300, total=52200
2025-12-02 12:06:49 DEBUG rdfsolve.parser: Chunked entity count: chunk 175, rows=300, total=52500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 175, rows=300, total=52500
2025-12-02 12:07:01 DEBUG rdfsolve.parser: Chunked entity count: chunk 176, rows=300, total=52800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 176, rows=300, total=52800
2025-12-02 12:07:12 DEBUG rdfsolve.parser: Chunked entity count: chunk 177, rows=300, total=53100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 177, rows=300, total=53100
2025-12-02 12:07:24 DEBUG rdfsolve.parser: Chunked entity count: chunk 178, rows=300, total=53400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 178, rows=300, total=53400
2025-12-02 12:07:36 DEBUG rdfsolve.parser: Chunked entity count: chunk 179, rows=300, total=53700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 179, rows=300, total=53700
2025-12-02 12:07:47 DEBUG rdfsolve.parser: Chunked entity count: chunk 180, rows=300, total=54000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 180, rows=300, total=54000
2025-12-02 12:07:59 DEBUG rdfsolve.parser: Chunked entity count: chunk 181, rows=300, total=54300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 181, rows=300, total=54300
2025-12-02 12:08:11 DEBUG rdfsolve.parser: Chunked entity count: chunk 182, rows=300, total=54600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 182, rows=300, total=54600
2025-12-02 12:08:23 DEBUG rdfsolve.parser: Chunked entity count: chunk 183, rows=300, total=54900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 183, rows=300, total=54900
2025-12-02 12:08:34 DEBUG rdfsolve.parser: Chunked entity count: chunk 184, rows=300, total=55200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 184, rows=300, total=55200
2025-12-02 12:08:46 DEBUG rdfsolve.parser: Chunked entity count: chunk 185, rows=300, total=55500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 185, rows=300, total=55500
2025-12-02 12:08:58 DEBUG rdfsolve.parser: Chunked entity count: chunk 186, rows=300, total=55800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 186, rows=300, total=55800
2025-12-02 12:09:09 DEBUG rdfsolve.parser: Chunked entity count: chunk 187, rows=300, total=56100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 187, rows=300, total=56100
2025-12-02 12:09:21 DEBUG rdfsolve.parser: Chunked entity count: chunk 188, rows=300, total=56400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 188, rows=300, total=56400
2025-12-02 12:09:33 DEBUG rdfsolve.parser: Chunked entity count: chunk 189, rows=300, total=56700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 189, rows=300, total=56700
2025-12-02 12:09:44 DEBUG rdfsolve.parser: Chunked entity count: chunk 190, rows=300, total=57000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 190, rows=300, total=57000
2025-12-02 12:09:56 DEBUG rdfsolve.parser: Chunked entity count: chunk 191, rows=300, total=57300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 191, rows=300, total=57300
2025-12-02 12:10:08 DEBUG rdfsolve.parser: Chunked entity count: chunk 192, rows=300, total=57600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 192, rows=300, total=57600
2025-12-02 12:10:20 DEBUG rdfsolve.parser: Chunked entity count: chunk 193, rows=300, total=57900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 193, rows=300, total=57900
2025-12-02 12:10:31 DEBUG rdfsolve.parser: Chunked entity count: chunk 194, rows=300, total=58200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 194, rows=300, total=58200
2025-12-02 12:10:43 DEBUG rdfsolve.parser: Chunked entity count: chunk 195, rows=300, total=58500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 195, rows=300, total=58500
2025-12-02 12:10:54 DEBUG rdfsolve.parser: Chunked entity count: chunk 196, rows=300, total=58800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 196, rows=300, total=58800
2025-12-02 12:11:06 DEBUG rdfsolve.parser: Chunked entity count: chunk 197, rows=300, total=59100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 197, rows=300, total=59100
2025-12-02 12:11:18 DEBUG rdfsolve.parser: Chunked entity count: chunk 198, rows=300, total=59400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 198, rows=300, total=59400
2025-12-02 12:11:30 DEBUG rdfsolve.parser: Chunked entity count: chunk 199, rows=300, total=59700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 199, rows=300, total=59700
2025-12-02 12:11:42 DEBUG rdfsolve.parser: Chunked entity count: chunk 200, rows=300, total=60000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 200, rows=300, total=60000
2025-12-02 12:11:54 DEBUG rdfsolve.parser: Chunked entity count: chunk 201, rows=300, total=60300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 201, rows=300, total=60300
2025-12-02 12:12:05 DEBUG rdfsolve.parser: Chunked entity count: chunk 202, rows=300, total=60600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 202, rows=300, total=60600
2025-12-02 12:12:17 DEBUG rdfsolve.parser: Chunked entity count: chunk 203, rows=300, total=60900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 203, rows=300, total=60900
2025-12-02 12:12:29 DEBUG rdfsolve.parser: Chunked entity count: chunk 204, rows=300, total=61200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 204, rows=300, total=61200
2025-12-02 12:12:41 DEBUG rdfsolve.parser: Chunked entity count: chunk 205, rows=300, total=61500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 205, rows=300, total=61500
2025-12-02 12:12:53 DEBUG rdfsolve.parser: Chunked entity count: chunk 206, rows=300, total=61800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 206, rows=300, total=61800
2025-12-02 12:13:04 DEBUG rdfsolve.parser: Chunked entity count: chunk 207, rows=300, total=62100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 207, rows=300, total=62100
2025-12-02 12:13:16 DEBUG rdfsolve.parser: Chunked entity count: chunk 208, rows=300, total=62400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 208, rows=300, total=62400
2025-12-02 12:13:28 DEBUG rdfsolve.parser: Chunked entity count: chunk 209, rows=300, total=62700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 209, rows=300, total=62700
2025-12-02 12:13:40 DEBUG rdfsolve.parser: Chunked entity count: chunk 210, rows=300, total=63000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 210, rows=300, total=63000
2025-12-02 12:13:51 DEBUG rdfsolve.parser: Chunked entity count: chunk 211, rows=300, total=63300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 211, rows=300, total=63300
2025-12-02 12:14:03 DEBUG rdfsolve.parser: Chunked entity count: chunk 212, rows=300, total=63600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 212, rows=300, total=63600
2025-12-02 12:14:15 DEBUG rdfsolve.parser: Chunked entity count: chunk 213, rows=300, total=63900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 213, rows=300, total=63900
2025-12-02 12:14:27 DEBUG rdfsolve.parser: Chunked entity count: chunk 214, rows=300, total=64200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 214, rows=300, total=64200
2025-12-02 12:14:38 DEBUG rdfsolve.parser: Chunked entity count: chunk 215, rows=300, total=64500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 215, rows=300, total=64500
2025-12-02 12:14:50 DEBUG rdfsolve.parser: Chunked entity count: chunk 216, rows=300, total=64800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 216, rows=300, total=64800
2025-12-02 12:15:02 DEBUG rdfsolve.parser: Chunked entity count: chunk 217, rows=300, total=65100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 217, rows=300, total=65100
2025-12-02 12:15:14 DEBUG rdfsolve.parser: Chunked entity count: chunk 218, rows=300, total=65400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 218, rows=300, total=65400
2025-12-02 12:15:26 DEBUG rdfsolve.parser: Chunked entity count: chunk 219, rows=300, total=65700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 219, rows=300, total=65700
2025-12-02 12:15:37 DEBUG rdfsolve.parser: Chunked entity count: chunk 220, rows=300, total=66000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 220, rows=300, total=66000
2025-12-02 12:15:49 DEBUG rdfsolve.parser: Chunked entity count: chunk 221, rows=300, total=66300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 221, rows=300, total=66300
2025-12-02 12:16:01 DEBUG rdfsolve.parser: Chunked entity count: chunk 222, rows=300, total=66600
DEBUG:rdfsolve.parser:Chunked entity count: chunk 222, rows=300, total=66600
2025-12-02 12:16:12 DEBUG rdfsolve.parser: Chunked entity count: chunk 223, rows=300, total=66900
DEBUG:rdfsolve.parser:Chunked entity count: chunk 223, rows=300, total=66900
2025-12-02 12:16:24 DEBUG rdfsolve.parser: Chunked entity count: chunk 224, rows=300, total=67200
DEBUG:rdfsolve.parser:Chunked entity count: chunk 224, rows=300, total=67200
2025-12-02 12:16:36 DEBUG rdfsolve.parser: Chunked entity count: chunk 225, rows=300, total=67500
DEBUG:rdfsolve.parser:Chunked entity count: chunk 225, rows=300, total=67500
2025-12-02 12:16:48 DEBUG rdfsolve.parser: Chunked entity count: chunk 226, rows=300, total=67800
DEBUG:rdfsolve.parser:Chunked entity count: chunk 226, rows=300, total=67800
2025-12-02 12:17:00 DEBUG rdfsolve.parser: Chunked entity count: chunk 227, rows=300, total=68100
DEBUG:rdfsolve.parser:Chunked entity count: chunk 227, rows=300, total=68100
2025-12-02 12:17:11 DEBUG rdfsolve.parser: Chunked entity count: chunk 228, rows=300, total=68400
DEBUG:rdfsolve.parser:Chunked entity count: chunk 228, rows=300, total=68400
2025-12-02 12:17:23 DEBUG rdfsolve.parser: Chunked entity count: chunk 229, rows=300, total=68700
DEBUG:rdfsolve.parser:Chunked entity count: chunk 229, rows=300, total=68700
2025-12-02 12:17:35 DEBUG rdfsolve.parser: Chunked entity count: chunk 230, rows=300, total=69000
DEBUG:rdfsolve.parser:Chunked entity count: chunk 230, rows=300, total=69000
2025-12-02 12:17:47 DEBUG rdfsolve.parser: Chunked entity count: chunk 231, rows=300, total=69300
DEBUG:rdfsolve.parser:Chunked entity count: chunk 231, rows=300, total=69300
2025-12-02 12:17:58 DEBUG rdfsolve.parser: Chunked entity count: chunk 232, rows=59, total=69359
DEBUG:rdfsolve.parser:Chunked entity count: chunk 232, rows=59, total=69359
2025-12-02 12:17:58 INFO rdfsolve.parser: Chunked entity counting complete: 232 chunks, 69359 total results
INFO:rdfsolve.parser:Chunked entity counting complete: 232 chunks, 69359 total results
2025-12-02 13:10:38 INFO rdfsolve.parser: Total instances collected: 2230412
INFO:rdfsolve.parser:Total instances collected: 2230412
Cached data to: /home/runner/work/rdfsolve/rdfsolve/notebooks/01_schema_extraction/../../docs/data/schema_extraction/pubchem.protein/cache/pubchem.protein_frequencies_with_instances.pkl
Frequencies DataFrame: 42218 shapes
Memory usage - Instances: 31610.8 KB
In [11]:
import pandas as pd
import plotly.graph_objects as go

if not frequencies_with_instances_df.empty:
    df = frequencies_with_instances_df.copy()
    df["coverage_percent"] = pd.to_numeric(df["coverage_percent"], errors="coerce").fillna(0)
    df = df.sort_values("coverage_percent", ascending=False).reset_index(drop=True)

    def make_label(row):
        return (
            f"<b>{row['subject_class']}</b> "
            f"<span style='color:#888;'></span> "
            f"<i>{row['property']}</i> "
            f"<span style='color:#888;'></span> "
            f"<b>{row['object_class']}</b>"
        )

    df["styled_label"] = df.apply(make_label, axis=1)

    text_positions = ["outside" if v < 95 else "inside" for v in df["coverage_percent"]]
    custom_colorscale = [
        [0.0, "#d36e61"],
        [0.4, "#e5cdbd"],
        [0.7, "#e8e4cf"],
        [1.0, "#c3d9c0"],
    ]

    # Figure sizing
    bar_height = 26
    fig_height = min(2000, bar_height * len(df) + 200)

    fig = go.Figure(
        go.Bar(
            x=df["coverage_percent"],
            y=df["styled_label"],
            orientation="h",
            text=[f"{v:.1f}%" for v in df["coverage_percent"]],
            textposition=text_positions,
            marker={
                "color": df["coverage_percent"],
                "colorscale": custom_colorscale,
                "cmin": 0,
                "cmax": 100,
                "line": {"color": "white", "width": 0.6},
            },
            hovertemplate="<b>%{y}</b><br>Coverage: %{x:.1f}%<extra></extra>",
        )
    )

    fig.update_layout(
        title={
            "text": f"Schema Pattern Coverage for {dataset_name}",
            "x": 0.5,
            "font": {"size": 18},
        },
        xaxis={
            "title": "Coverage (%)",
            "range": [0, 100],  # fixed x-axis range
            "ticksuffix": "%",
            "showgrid": True,
            "gridcolor": "rgba(220,220,220,0.3)",
        },
        yaxis={
            "title": "",
            "autorange": "reversed",
            "automargin": True,
            "fixedrange": False,  # allow vertical zoom/pan
        },
        template="plotly_white",
        autosize=True,  # allow figure to scale with container
        height=fig_height,  # base height (will scale)
        margin={"t": 80, "b": 50, "l": 480, "r": 150},  # extra right margin for text
        plot_bgcolor="white",
        paper_bgcolor="white",
    )

    # Disable horizontal zoom/pan
    fig.update_xaxes(fixedrange=True)

    # Show figure with config for HTML export compatibility
    fig.show(
        config={
            "scrollZoom": True,
            "responsive": True,
            "toImageButtonOptions": {
                "format": "png",
                "filename": f"{dataset_name}_schema_coverage",
                "height": fig_height,
                "width": 600,
                "scale": 1,
            },
        }
    )

else:
    display(Markdown("**No coverage data to visualize**"))

LinkML (derived from JSON-LD)¶

In [12]:
# Generate LinkML directly from JSON-LD with custom schema URI
print("Regenerating LinkML schema from JSON-LD with custom schema URI...")

schema_name = f"{dataset_name}_schema"
custom_schema_uri = (
    f"http://jmillanacosta.github.io/rdfsolve/{dataset_name}/linkml"  # User-definable base URI
)

yaml_text = vp.to_linkml_yaml(
    schema_name=schema_name,
    schema_description=f"LinkML schema for {dataset_name} generated from JSON-LD",
    schema_base_uri=custom_schema_uri,
    filter_void_nodes=True,
)

# Save to LinkML YAML
linkml_file = os.path.join(exports_path, f"{dataset_name}_linkml_schema.yaml")
with open(linkml_file, "w", encoding="utf-8") as f:
    f.write(yaml_text)

print(f"LinkML YAML saved to: {linkml_file}")
Regenerating LinkML schema from JSON-LD with custom schema URI...
LinkML YAML saved to: /home/runner/work/rdfsolve/rdfsolve/notebooks/01_schema_extraction/../../docs/data/schema_extraction/pubchem.protein/pubchem.protein_linkml_schema.yaml
In [13]:
from linkml.generators.erdiagramgen import ERDiagramGenerator
from linkml_runtime.utils.schemaview import SchemaView

sv = SchemaView(linkml_file)
linkml_schema = sv.schema

display(
    Markdown(
        f"**Parsed LinkML schema:** Classes = {len(sv.all_classes())}, Slots = {len(sv.all_slots())}"
    )
)

# Build and display a Mermaid class diagram for the aopwikirdf LinkedML
mermaid_code = ERDiagramGenerator(linkml_file).serialize()

display(Markdown(mermaid_code))

Parsed LinkML schema: Classes = 202, Slots = 1

erDiagram
BiopaxleveProtein {

}
Chebi17089 {

}
Pr000000001 {

}
Pr000000176 {

}
Pr000000814 {

}
Pr000001173 {

}
Pr000001206 {

}
Pr000001398 {

}
Pr000001587 {

}
Pr000001632 {

}
Pr000001654 {

}
Pr000001703 {

}
Pr000001961 {

}
Pr000001984 {

}
Pr000002103 {

}
Pr000003059 {

}
Pr000003064 {

}
Pr000003202 {

}
Pr000003270 {

}
Pr000003271 {

}
Pr000003416 {

}
Pr000003636 {

}
Pr000003676 {

}
Pr000003791 {

}
Pr000003906 {

}
Pr000004055 {

}
Pr000004105 {

}
Pr000004415 {

}
Pr000004515 {

}
Pr000004516 {

}
Pr000004712 {

}
Pr000005257 {

}
Pr000005584 {

}
Pr000005813 {

}
Pr000005950 {

}
Pr000006132 {

}
Pr000006133 {

}
Pr000007124 {

}
Pr000008457 {

}
Pr000008556 {

}
Pr000008615 {

}
Pr000008811 {

}
Pr000009267 {

}
Pr000009439 {

}
Pr000009771 {

}
Pr000009802 {

}
Pr000010165 {

}
Pr000010229 {

}
Pr000010268 {

}
Pr000010415 {

}
Pr000011334 {

}
Pr000011416 {

}
Pr000012466 {

}
Pr000012467 {

}
Pr000012479 {

}
Pr000012745 {

}
Pr000013479 {

}
Pr000014284 {

}
Pr000014373 {

}
Pr000015167 {

}
Pr000016481 {

}
Pr000016695 {

}
Pr000017300 {

}
Pr000023005 {

}
Pr000032150 {

}
Pr000032431 {

}
Pr000035207 {

}
PrA2ASI5 {

}
PrA5A4K9 {

}
PrB0BNE5 {

}
PrB0V2N1 {

}
PrD3ZTE0 {

}
PrE9Q4S1 {

}
PrF1M391 {

}
PrO15164 {

}
PrO15438 {

}
PrO15552 {

}
PrO35881 {

}
PrO55023 {

}
PrO95838 {

}
PrP00356 {

}
PrP00439 {

}
PrP00760 {

}
PrP01112 {

}
PrP04800 {

}
PrP08276 {

}
PrP09874 {

}
PrP0AG80 {

}
PrP10064 {

}
PrP11157 {

}
PrP13639 {

}
PrP15145 {

}
PrP15385 {

}
PrP15539 {

}
PrP16390 {

}
PrP19205 {

}
PrP19785 {

}
PrP20420 {

}
PrP20612 {

}
PrP21803 {

}
PrP22888 {

}
PrP23141 {

}
PrP24524 {

}
PrP26897 {

}
PrP28843 {

}
PrP31041 {

}
PrP32501 {

}
PrP41180 {

}
PrP42681 {

}
PrP42858 {

}
PrP43003 {

}
PrP43352 {

}
PrP43354 {

}
PrP46406 {

}
PrP47196 {

}
PrP48169 {

}
PrP48442 {

}
PrP48443 {

}
PrP48544 {

}
PrP48730 {

}
PrP49657 {

}
PrP50114 {

}
PrP50571 {

}
PrP51577 {

}
PrP51617 {

}
PrP51788 {

}
PrP51880 {

}
PrP52186 {

}
PrP53778 {

}
PrP55011 {

}
PrP56221 {

}
PrP56657 {

}
PrP56658 {

}
PrP56696 {

}
PrP61887 {

}
PrP70340 {

}
PrP70579 {

}
PrP70597 {

}
PrP97931 {

}
PrP98106 {

}
PrQ01064 {

}
PrQ05438 {

}
PrQ05816 {

}
PrQ05940 {

}
PrQ07973 {

}
PrQ09128 {

}
PrQ14449 {

}
PrQ15119 {

}
PrQ15759 {

}
PrQ15831 {

}
PrQ28691 {

}
PrQ2MKA5 {

}
PrQ4G050 {

}
PrQ4G072 {

}
PrQ4V8J7 {

}
PrQ5I0E9 {

}
PrQ5XPT3 {

}
PrQ60613 {

}
PrQ61096 {

}
PrQ62413 {

}
PrQ62968 {

}
PrQ63484 {

}
PrQ6P8U6 {

}
PrQ6U1I9 {

}
PrQ7ZUC7 {

}
PrQ80W21 {

}
PrQ86UX6 {

}
PrQ8BYR2 {

}
PrQ8CA95 {

}
PrQ8CIN4 {

}
PrQ8CJF9 {

}
PrQ8JH70 {

}
PrQ8R429 {

}
PrQ8TCT1 {

}
PrQ90733 {

}
PrQ924T8 {

}
PrQ92993 {

}
PrQ96HU8 {

}
PrQ99JT2 {

}
PrQ9BZZ2 {

}
PrQ9D1P2 {

}
PrQ9D8I2 {

}
PrQ9H3R0 {

}
PrQ9H7Z7 {

}
PrQ9HB14 {

}
PrQ9JHF7 {

}
PrQ9NRG4 {

}
PrQ9QUR6 {

}
PrQ9QXX3 {

}
PrQ9R0P3 {

}
PrQ9R0S2 {

}
PrQ9R1E6 {

}
PrQ9UHI7 {

}
PrQ9UIQ6 {

}
PrQ9UM07 {

}
PrQ9WTR1 {

}
PrQ9WVJ0 {

}
PrQ9Y4K4 {

}
PrQ9Y5Z0 {

}
PrQ9Z0V1 {

}
Sio010043 {

}
VocabularyProtein {

}

BiopaxleveProtein ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Chebi17089 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000000001 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000000176 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000000814 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000001173 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000001206 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000001398 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000001587 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000001632 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000001654 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000001703 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000001961 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000001984 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000002103 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000003059 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000003064 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000003202 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000003270 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000003271 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000003416 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000003636 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000003676 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000003791 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000003906 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000004055 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000004105 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000004415 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000004515 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000004516 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000004712 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000005257 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000005584 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000005813 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000005950 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000006132 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000006133 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000007124 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000008457 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000008556 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000008615 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000008811 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000009267 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000009439 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000009771 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000009802 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000010165 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000010229 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000010268 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000010415 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000011334 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000011416 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000012466 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000012467 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000012479 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000012745 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000013479 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000014284 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000014373 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000015167 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000016481 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000016695 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000017300 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000023005 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000032150 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000032431 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Pr000035207 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrA2ASI5 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrA5A4K9 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrB0BNE5 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrB0V2N1 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrD3ZTE0 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrE9Q4S1 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrF1M391 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrO15164 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrO15438 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrO15552 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrO35881 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrO55023 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrO95838 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP00356 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP00439 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP00760 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP01112 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP04800 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP08276 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP09874 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP0AG80 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP10064 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP11157 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP13639 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP15145 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP15385 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP15539 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP16390 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP19205 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP19785 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP20420 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP20612 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP21803 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP22888 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP23141 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP24524 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP26897 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP28843 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP31041 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP32501 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP41180 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP42681 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP42858 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP43003 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP43352 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP43354 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP46406 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP47196 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP48169 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP48442 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP48443 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP48544 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP48730 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP49657 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP50114 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP50571 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP51577 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP51617 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP51788 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP51880 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP52186 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP53778 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP55011 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP56221 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP56657 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP56658 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP56696 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP61887 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP70340 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP70579 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP70597 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP97931 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrP98106 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ01064 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ05438 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ05816 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ05940 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ07973 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ09128 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ14449 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ15119 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ15759 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ15831 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ28691 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ2MKA5 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ4G050 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ4G072 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ4V8J7 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ5I0E9 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ5XPT3 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ60613 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ61096 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ62413 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ62968 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ63484 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ6P8U6 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ6U1I9 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ7ZUC7 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ80W21 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ86UX6 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ8BYR2 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ8CA95 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ8CIN4 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ8CJF9 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ8JH70 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ8R429 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ8TCT1 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ90733 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ924T8 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ92993 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ96HU8 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ99JT2 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ9BZZ2 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ9D1P2 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ9D8I2 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ9H3R0 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ9H7Z7 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ9HB14 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ9JHF7 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ9NRG4 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ9QUR6 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ9QXX3 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ9R0P3 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ9R0S2 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ9R1E6 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ9UHI7 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ9UIQ6 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ9UM07 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ9WTR1 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ9WVJ0 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ9Y4K4 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ9Y5Z0 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
PrQ9Z0V1 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
Sio010043 ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
VocabularyProtein ||--|o Pr000000001 : "vocabulary_has_Similar_Protein"
In [14]:
json_path = os.path.join(exports_path, f"{dataset_name}_schema.json")
csv_path = os.path.join(exports_path, f"{dataset_name}_schema.csv")

# Export CSV from frequencies
frequencies_df.to_csv(csv_path, index=False)

# Export JSON derived from JSON-LD (maintains consistency)
with open(json_path, "w", encoding="utf-8") as fh:
    json.dump(vp.to_json(filter_void_nodes=True), fh, indent=2)

print(f"CSV exported to: {csv_path}")
print(f"JSON exported to: {json_path}")
CSV exported to: /home/runner/work/rdfsolve/rdfsolve/notebooks/01_schema_extraction/../../docs/data/schema_extraction/pubchem.protein/pubchem.protein_schema.csv
JSON exported to: /home/runner/work/rdfsolve/rdfsolve/notebooks/01_schema_extraction/../../docs/data/schema_extraction/pubchem.protein/pubchem.protein_schema.json
In [15]:
# Export collected SPARQL queries as TTL
queries_path = os.path.join(exports_path, f"{dataset_name}_sparql_queries.ttl")
queries = SparqlHelper.get_collected_queries()

if queries:
    ttl_content = SparqlHelper.export_queries_as_ttl(
        output_file=queries_path,
        base_uri=f"https://github.com/jmillanacosta/rdfsolve/sparql/{dataset_name}/",
        dataset_name=dataset_name,
    )
    print(f"Exported {len(queries)} SPARQL queries to: {queries_path}")
else:
    print("No SPARQL queries were collected")
Exported 57028 SPARQL queries to: /home/runner/work/rdfsolve/rdfsolve/notebooks/01_schema_extraction/../../docs/data/schema_extraction/pubchem.protein/pubchem.protein_sparql_queries.ttl